code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield="name")
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _MakeLegacyNodeInfo(data):
 586   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 587
 588   Converts the data into a single dictionary. This is fine for most use cases,
 589   but some require information from more than one volume group or hypervisor.
 590
 591   """
 592   (bootid, (vg_info, ), (hv_info, )) = data
 593
 594   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 595     "bootid": bootid,
 596     })
 597
 598
 599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 600   """Checks if the owned node groups are still correct for an instance.
 601
 602   @type cfg: L{config.ConfigWriter}
 603   @param cfg: The cluster configuration
 604   @type instance_name: string
 605   @param instance_name: Instance name
 606   @type owned_groups: set or frozenset
 607   @param owned_groups: List of currently owned node groups
 608
 609   """
 610   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 611
 612   if not owned_groups.issuperset(inst_groups):
 613     raise errors.OpPrereqError("Instance %s's node groups changed since"
 614                                " locks were acquired, current groups are"
 615                                " are '%s', owning groups '%s'; retry the"
 616                                " operation" %
 617                                (instance_name,
 618                                 utils.CommaJoin(inst_groups),
 619                                 utils.CommaJoin(owned_groups)),
 620                                errors.ECODE_STATE)
 621
 622   return inst_groups
 623
 624
 625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 626   """Checks if the instances in a node group are still correct.
 627
 628   @type cfg: L{config.ConfigWriter}
 629   @param cfg: The cluster configuration
 630   @type group_uuid: string
 631   @param group_uuid: Node group UUID
 632   @type owned_instances: set or frozenset
 633   @param owned_instances: List of currently owned instances
 634
 635   """
 636   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 637   if owned_instances != wanted_instances:
 638     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 639                                " locks were acquired, wanted '%s', have '%s';"
 640                                " retry the operation" %
 641                                (group_uuid,
 642                                 utils.CommaJoin(wanted_instances),
 643                                 utils.CommaJoin(owned_instances)),
 644                                errors.ECODE_STATE)
 645
 646   return wanted_instances
 647
 648
 649 def _SupportsOob(cfg, node):
 650   """Tells if node supports OOB.
 651
 652   @type cfg: L{config.ConfigWriter}
 653   @param cfg: The cluster configuration
 654   @type node: L{objects.Node}
 655   @param node: The node
 656   @return: The OOB script if supported or an empty string otherwise
 657
 658   """
 659   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 660
 661
 662 def _GetWantedNodes(lu, nodes):
 663   """Returns list of checked and expanded node names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type nodes: list
 668   @param nodes: list of node names or None for all nodes
 669   @rtype: list
 670   @return: the list of nodes, sorted
 671   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 672
 673   """
 674   if nodes:
 675     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 676
 677   return utils.NiceSort(lu.cfg.GetNodeList())
 678
 679
 680 def _GetWantedInstances(lu, instances):
 681   """Returns list of checked and expanded instance names.
 682
 683   @type lu: L{LogicalUnit}
 684   @param lu: the logical unit on whose behalf we execute
 685   @type instances: list
 686   @param instances: list of instance names or None for all instances
 687   @rtype: list
 688   @return: the list of instances, sorted
 689   @raise errors.OpPrereqError: if the instances parameter is wrong type
 690   @raise errors.OpPrereqError: if any of the passed instances is not found
 691
 692   """
 693   if instances:
 694     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 695   else:
 696     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 697   return wanted
 698
 699
 700 def _GetUpdatedParams(old_params, update_dict,
 701                       use_default=True, use_none=False):
 702   """Return the new version of a parameter dictionary.
 703
 704   @type old_params: dict
 705   @param old_params: old parameters
 706   @type update_dict: dict
 707   @param update_dict: dict containing new parameter values, or
 708       constants.VALUE_DEFAULT to reset the parameter to its default
 709       value
 710   @param use_default: boolean
 711   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 712       values as 'to be deleted' values
 713   @param use_none: boolean
 714   @type use_none: whether to recognise C{None} values as 'to be
 715       deleted' values
 716   @rtype: dict
 717   @return: the new parameter dictionary
 718
 719   """
 720   params_copy = copy.deepcopy(old_params)
 721   for key, val in update_dict.iteritems():
 722     if ((use_default and val == constants.VALUE_DEFAULT) or
 723         (use_none and val is None)):
 724       try:
 725         del params_copy[key]
 726       except KeyError:
 727         pass
 728     else:
 729       params_copy[key] = val
 730   return params_copy
 731
 732
 733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 734   """Return the new version of a instance policy.
 735
 736   @param group_policy: whether this policy applies to a group and thus
 737     we should support removal of policy entries
 738
 739   """
 740   use_none = use_default = group_policy
 741   ipolicy = copy.deepcopy(old_ipolicy)
 742   for key, value in new_ipolicy.items():
 743     if key not in constants.IPOLICY_ALL_KEYS:
 744       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 745                                  errors.ECODE_INVAL)
 746     if key in constants.IPOLICY_ISPECS:
 747       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 748       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 749                                        use_none=use_none,
 750                                        use_default=use_default)
 751     else:
 752       if not value or value == [constants.VALUE_DEFAULT]:
 753         if group_policy:
 754           del ipolicy[key]
 755         else:
 756           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 757                                      " on the cluster'" % key,
 758                                      errors.ECODE_INVAL)
 759       else:
 760         if key in constants.IPOLICY_PARAMETERS:
 761           # FIXME: we assume all such values are float
 762           try:
 763             ipolicy[key] = float(value)
 764           except (TypeError, ValueError), err:
 765             raise errors.OpPrereqError("Invalid value for attribute"
 766                                        " '%s': '%s', error: %s" %
 767                                        (key, value, err), errors.ECODE_INVAL)
 768         else:
 769           # FIXME: we assume all others are lists; this should be redone
 770           # in a nicer way
 771           ipolicy[key] = list(value)
 772   try:
 773     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 774   except errors.ConfigurationError, err:
 775     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 776                                errors.ECODE_INVAL)
 777   return ipolicy
 778
 779
 780 def _UpdateAndVerifySubDict(base, updates, type_check):
 781   """Updates and verifies a dict with sub dicts of the same type.
 782
 783   @param base: The dict with the old data
 784   @param updates: The dict with the new data
 785   @param type_check: Dict suitable to ForceDictType to verify correct types
 786   @returns: A new dict with updated and verified values
 787
 788   """
 789   def fn(old, value):
 790     new = _GetUpdatedParams(old, value)
 791     utils.ForceDictType(new, type_check)
 792     return new
 793
 794   ret = copy.deepcopy(base)
 795   ret.update(dict((key, fn(base.get(key, {}), value))
 796                   for key, value in updates.items()))
 797   return ret
 798
 799
 800 def _MergeAndVerifyHvState(op_input, obj_input):
 801   """Combines the hv state from an opcode with the one of the object
 802
 803   @param op_input: The input dict from the opcode
 804   @param obj_input: The input dict from the objects
 805   @return: The verified and updated dict
 806
 807   """
 808   if op_input:
 809     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 810     if invalid_hvs:
 811       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 812                                  " %s" % utils.CommaJoin(invalid_hvs),
 813                                  errors.ECODE_INVAL)
 814     if obj_input is None:
 815       obj_input = {}
 816     type_check = constants.HVSTS_PARAMETER_TYPES
 817     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 818
 819   return None
 820
 821
 822 def _MergeAndVerifyDiskState(op_input, obj_input):
 823   """Combines the disk state from an opcode with the one of the object
 824
 825   @param op_input: The input dict from the opcode
 826   @param obj_input: The input dict from the objects
 827   @return: The verified and updated dict
 828   """
 829   if op_input:
 830     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 831     if invalid_dst:
 832       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 833                                  utils.CommaJoin(invalid_dst),
 834                                  errors.ECODE_INVAL)
 835     type_check = constants.DSS_PARAMETER_TYPES
 836     if obj_input is None:
 837       obj_input = {}
 838     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 839                                               type_check))
 840                 for key, value in op_input.items())
 841
 842   return None
 843
 844
 845 def _ReleaseLocks(lu, level, names=None, keep=None):
 846   """Releases locks owned by an LU.
 847
 848   @type lu: L{LogicalUnit}
 849   @param level: Lock level
 850   @type names: list or None
 851   @param names: Names of locks to release
 852   @type keep: list or None
 853   @param keep: Names of locks to retain
 854
 855   """
 856   assert not (keep is not None and names is not None), \
 857          "Only one of the 'names' and the 'keep' parameters can be given"
 858
 859   if names is not None:
 860     should_release = names.__contains__
 861   elif keep:
 862     should_release = lambda name: name not in keep
 863   else:
 864     should_release = None
 865
 866   owned = lu.owned_locks(level)
 867   if not owned:
 868     # Not owning any lock at this level, do nothing
 869     pass
 870
 871   elif should_release:
 872     retain = []
 873     release = []
 874
 875     # Determine which locks to release
 876     for name in owned:
 877       if should_release(name):
 878         release.append(name)
 879       else:
 880         retain.append(name)
 881
 882     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 883
 884     # Release just some locks
 885     lu.glm.release(level, names=release)
 886
 887     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 888   else:
 889     # Release everything
 890     lu.glm.release(level)
 891
 892     assert not lu.glm.is_owned(level), "No locks should be owned"
 893
 894
 895 def _MapInstanceDisksToNodes(instances):
 896   """Creates a map from (node, volume) to instance name.
 897
 898   @type instances: list of L{objects.Instance}
 899   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 900
 901   """
 902   return dict(((node, vol), inst.name)
 903               for inst in instances
 904               for (node, vols) in inst.MapLVsByNode().items()
 905               for vol in vols)
 906
 907
 908 def _RunPostHook(lu, node_name):
 909   """Runs the post-hook for an opcode on a single node.
 910
 911   """
 912   hm = lu.proc.BuildHooksManager(lu)
 913   try:
 914     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 915   except:
 916     # pylint: disable=W0702
 917     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 918
 919
 920 def _CheckOutputFields(static, dynamic, selected):
 921   """Checks whether all selected fields are valid.
 922
 923   @type static: L{utils.FieldSet}
 924   @param static: static fields set
 925   @type dynamic: L{utils.FieldSet}
 926   @param dynamic: dynamic fields set
 927
 928   """
 929   f = utils.FieldSet()
 930   f.Extend(static)
 931   f.Extend(dynamic)
 932
 933   delta = f.NonMatching(selected)
 934   if delta:
 935     raise errors.OpPrereqError("Unknown output fields selected: %s"
 936                                % ",".join(delta), errors.ECODE_INVAL)
 937
 938
 939 def _CheckGlobalHvParams(params):
 940   """Validates that given hypervisor params are not global ones.
 941
 942   This will ensure that instances don't get customised versions of
 943   global params.
 944
 945   """
 946   used_globals = constants.HVC_GLOBALS.intersection(params)
 947   if used_globals:
 948     msg = ("The following hypervisor parameters are global and cannot"
 949            " be customized at instance level, please modify them at"
 950            " cluster level: %s" % utils.CommaJoin(used_globals))
 951     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 952
 953
 954 def _CheckNodeOnline(lu, node, msg=None):
 955   """Ensure that a given node is online.
 956
 957   @param lu: the LU on behalf of which we make the check
 958   @param node: the node to check
 959   @param msg: if passed, should be a message to replace the default one
 960   @raise errors.OpPrereqError: if the node is offline
 961
 962   """
 963   if msg is None:
 964     msg = "Can't use offline node"
 965   if lu.cfg.GetNodeInfo(node).offline:
 966     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 967
 968
 969 def _CheckNodeNotDrained(lu, node):
 970   """Ensure that a given node is not drained.
 971
 972   @param lu: the LU on behalf of which we make the check
 973   @param node: the node to check
 974   @raise errors.OpPrereqError: if the node is drained
 975
 976   """
 977   if lu.cfg.GetNodeInfo(node).drained:
 978     raise errors.OpPrereqError("Can't use drained node %s" % node,
 979                                errors.ECODE_STATE)
 980
 981
 982 def _CheckNodeVmCapable(lu, node):
 983   """Ensure that a given node is vm capable.
 984
 985   @param lu: the LU on behalf of which we make the check
 986   @param node: the node to check
 987   @raise errors.OpPrereqError: if the node is not vm capable
 988
 989   """
 990   if not lu.cfg.GetNodeInfo(node).vm_capable:
 991     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 992                                errors.ECODE_STATE)
 993
 994
 995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 996   """Ensure that a node supports a given OS.
 997
 998   @param lu: the LU on behalf of which we make the check
 999   @param node: the node to check
1000   @param os_name: the OS to query about
1001   @param force_variant: whether to ignore variant errors
1002   @raise errors.OpPrereqError: if the node is not supporting the OS
1003
1004   """
1005   result = lu.rpc.call_os_get(node, os_name)
1006   result.Raise("OS '%s' not in supported OS list for node %s" %
1007                (os_name, node),
1008                prereq=True, ecode=errors.ECODE_INVAL)
1009   if not force_variant:
1010     _CheckOSVariant(result.payload, os_name)
1011
1012
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014   """Ensure that a node has the given secondary ip.
1015
1016   @type lu: L{LogicalUnit}
1017   @param lu: the LU on behalf of which we make the check
1018   @type node: string
1019   @param node: the node to check
1020   @type secondary_ip: string
1021   @param secondary_ip: the ip to check
1022   @type prereq: boolean
1023   @param prereq: whether to throw a prerequisite or an execute error
1024   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1026
1027   """
1028   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029   result.Raise("Failure checking secondary ip on node %s" % node,
1030                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031   if not result.payload:
1032     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033            " please fix and re-run this command" % secondary_ip)
1034     if prereq:
1035       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1036     else:
1037       raise errors.OpExecError(msg)
1038
1039
1040 def _GetClusterDomainSecret():
1041   """Reads the cluster domain secret.
1042
1043   """
1044   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1045                                strict=True)
1046
1047
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049   """Ensure that an instance is in one of the required states.
1050
1051   @param lu: the LU on behalf of which we make the check
1052   @param instance: the instance to check
1053   @param msg: if passed, should be a message to replace the default one
1054   @raise errors.OpPrereqError: if the instance is not in the required state
1055
1056   """
1057   if msg is None:
1058     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059   if instance.admin_state not in req_states:
1060     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061                                (instance.name, instance.admin_state, msg),
1062                                errors.ECODE_STATE)
1063
1064   if constants.ADMINST_UP not in req_states:
1065     pnode = instance.primary_node
1066     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068                 prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070     if instance.name in ins_l.payload:
1071       raise errors.OpPrereqError("Instance %s is running, %s" %
1072                                  (instance.name, msg), errors.ECODE_STATE)
1073
1074
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076   """Computes if value is in the desired range.
1077
1078   @param name: name of the parameter for which we perform the check
1079   @param ipolicy: dictionary containing min, max and std values
1080   @param value: actual value that we want to use
1081   @return: None or element not meeting the criteria
1082
1083
1084   """
1085   if value in [None, constants.VALUE_AUTO]:
1086     return None
1087   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089   if value > max_v or min_v > value:
1090     return ("%s value %s is not in range [%s, %s]" %
1091             (name, value, min_v, max_v))
1092   return None
1093
1094
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096                                  nic_count, disk_sizes,
1097                                  _compute_fn=_ComputeMinMaxSpec):
1098   """Verifies ipolicy against provided specs.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy
1102   @type mem_size: int
1103   @param mem_size: The memory size
1104   @type cpu_count: int
1105   @param cpu_count: Used cpu cores
1106   @type disk_count: int
1107   @param disk_count: Number of disks used
1108   @type nic_count: int
1109   @param nic_count: Number of nics used
1110   @type disk_sizes: list of ints
1111   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112   @param _compute_fn: The compute function (unittest only)
1113   @return: A list of violations, or an empty list of no violations are found
1114
1115   """
1116   assert disk_count == len(disk_sizes)
1117
1118   test_settings = [
1119     (constants.ISPEC_MEM_SIZE, mem_size),
1120     (constants.ISPEC_CPU_COUNT, cpu_count),
1121     (constants.ISPEC_DISK_COUNT, disk_count),
1122     (constants.ISPEC_NIC_COUNT, nic_count),
1123     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1124
1125   return filter(None,
1126                 (_compute_fn(name, ipolicy, value)
1127                  for (name, value) in test_settings))
1128
1129
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131                                      _compute_fn=_ComputeIPolicySpecViolation):
1132   """Compute if instance meets the specs of ipolicy.
1133
1134   @type ipolicy: dict
1135   @param ipolicy: The ipolicy to verify against
1136   @type instance: L{objects.Instance}
1137   @param instance: The instance to verify
1138   @param _compute_fn: The function to verify ipolicy (unittest only)
1139   @see: L{_ComputeIPolicySpecViolation}
1140
1141   """
1142   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144   disk_count = len(instance.disks)
1145   disk_sizes = [disk.size for disk in instance.disks]
1146   nic_count = len(instance.nics)
1147
1148   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1149                      disk_sizes)
1150
1151
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153     _compute_fn=_ComputeIPolicySpecViolation):
1154   """Compute if instance specs meets the specs of ipolicy.
1155
1156   @type ipolicy: dict
1157   @param ipolicy: The ipolicy to verify against
1158   @param instance_spec: dict
1159   @param instance_spec: The instance spec to verify
1160   @param _compute_fn: The function to verify ipolicy (unittest only)
1161   @see: L{_ComputeIPolicySpecViolation}
1162
1163   """
1164   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1169
1170   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1171                      disk_sizes)
1172
1173
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1175                                  target_group,
1176                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1177   """Compute if instance meets the specs of the new target group.
1178
1179   @param ipolicy: The ipolicy to verify
1180   @param instance: The instance object to verify
1181   @param current_group: The current group of the instance
1182   @param target_group: The new group of the instance
1183   @param _compute_fn: The function to verify ipolicy (unittest only)
1184   @see: L{_ComputeIPolicySpecViolation}
1185
1186   """
1187   if current_group == target_group:
1188     return []
1189   else:
1190     return _compute_fn(ipolicy, instance)
1191
1192
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194                             _compute_fn=_ComputeIPolicyNodeViolation):
1195   """Checks that the target node is correct in terms of instance policy.
1196
1197   @param ipolicy: The ipolicy to verify
1198   @param instance: The instance object to verify
1199   @param node: The new node to relocate
1200   @param ignore: Ignore violations of the ipolicy
1201   @param _compute_fn: The function to verify ipolicy (unittest only)
1202   @see: L{_ComputeIPolicySpecViolation}
1203
1204   """
1205   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1207
1208   if res:
1209     msg = ("Instance does not meet target node group's (%s) instance"
1210            " policy: %s") % (node.group, utils.CommaJoin(res))
1211     if ignore:
1212       lu.LogWarning(msg)
1213     else:
1214       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1215
1216
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218   """Computes a set of any instances that would violate the new ipolicy.
1219
1220   @param old_ipolicy: The current (still in-place) ipolicy
1221   @param new_ipolicy: The new (to become) ipolicy
1222   @param instances: List of instances to verify
1223   @return: A list of instances which violates the new ipolicy but did not before
1224
1225   """
1226   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227           _ComputeViolatingInstances(new_ipolicy, instances))
1228
1229
1230 def _ExpandItemName(fn, name, kind):
1231   """Expand an item name.
1232
1233   @param fn: the function to use for expansion
1234   @param name: requested item name
1235   @param kind: text description ('Node' or 'Instance')
1236   @return: the resolved (full) name
1237   @raise errors.OpPrereqError: if the item is not found
1238
1239   """
1240   full_name = fn(name)
1241   if full_name is None:
1242     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1243                                errors.ECODE_NOENT)
1244   return full_name
1245
1246
1247 def _ExpandNodeName(cfg, name):
1248   """Wrapper over L{_ExpandItemName} for nodes."""
1249   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1250
1251
1252 def _ExpandInstanceName(cfg, name):
1253   """Wrapper over L{_ExpandItemName} for instance."""
1254   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1255
1256
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258                           minmem, maxmem, vcpus, nics, disk_template, disks,
1259                           bep, hvp, hypervisor_name, tags):
1260   """Builds instance related env variables for hooks
1261
1262   This builds the hook environment from individual variables.
1263
1264   @type name: string
1265   @param name: the name of the instance
1266   @type primary_node: string
1267   @param primary_node: the name of the instance's primary node
1268   @type secondary_nodes: list
1269   @param secondary_nodes: list of secondary nodes as strings
1270   @type os_type: string
1271   @param os_type: the name of the instance's OS
1272   @type status: string
1273   @param status: the desired status of the instance
1274   @type minmem: string
1275   @param minmem: the minimum memory size of the instance
1276   @type maxmem: string
1277   @param maxmem: the maximum memory size of the instance
1278   @type vcpus: string
1279   @param vcpus: the count of VCPUs the instance has
1280   @type nics: list
1281   @param nics: list of tuples (ip, mac, mode, link) representing
1282       the NICs the instance has
1283   @type disk_template: string
1284   @param disk_template: the disk template of the instance
1285   @type disks: list
1286   @param disks: the list of (size, mode) pairs
1287   @type bep: dict
1288   @param bep: the backend parameters for the instance
1289   @type hvp: dict
1290   @param hvp: the hypervisor parameters for the instance
1291   @type hypervisor_name: string
1292   @param hypervisor_name: the hypervisor for the instance
1293   @type tags: list
1294   @param tags: list of instance tags as strings
1295   @rtype: dict
1296   @return: the hook environment for this instance
1297
1298   """
1299   env = {
1300     "OP_TARGET": name,
1301     "INSTANCE_NAME": name,
1302     "INSTANCE_PRIMARY": primary_node,
1303     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304     "INSTANCE_OS_TYPE": os_type,
1305     "INSTANCE_STATUS": status,
1306     "INSTANCE_MINMEM": minmem,
1307     "INSTANCE_MAXMEM": maxmem,
1308     # TODO(2.7) remove deprecated "memory" value
1309     "INSTANCE_MEMORY": maxmem,
1310     "INSTANCE_VCPUS": vcpus,
1311     "INSTANCE_DISK_TEMPLATE": disk_template,
1312     "INSTANCE_HYPERVISOR": hypervisor_name,
1313   }
1314   if nics:
1315     nic_count = len(nics)
1316     for idx, (ip, mac, mode, link) in enumerate(nics):
1317       if ip is None:
1318         ip = ""
1319       env["INSTANCE_NIC%d_IP" % idx] = ip
1320       env["INSTANCE_NIC%d_MAC" % idx] = mac
1321       env["INSTANCE_NIC%d_MODE" % idx] = mode
1322       env["INSTANCE_NIC%d_LINK" % idx] = link
1323       if mode == constants.NIC_MODE_BRIDGED:
1324         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1325   else:
1326     nic_count = 0
1327
1328   env["INSTANCE_NIC_COUNT"] = nic_count
1329
1330   if disks:
1331     disk_count = len(disks)
1332     for idx, (size, mode) in enumerate(disks):
1333       env["INSTANCE_DISK%d_SIZE" % idx] = size
1334       env["INSTANCE_DISK%d_MODE" % idx] = mode
1335   else:
1336     disk_count = 0
1337
1338   env["INSTANCE_DISK_COUNT"] = disk_count
1339
1340   if not tags:
1341     tags = []
1342
1343   env["INSTANCE_TAGS"] = " ".join(tags)
1344
1345   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346     for key, value in source.items():
1347       env["INSTANCE_%s_%s" % (kind, key)] = value
1348
1349   return env
1350
1351
1352 def _NICListToTuple(lu, nics):
1353   """Build a list of nic information tuples.
1354
1355   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356   value in LUInstanceQueryData.
1357
1358   @type lu:  L{LogicalUnit}
1359   @param lu: the logical unit on whose behalf we execute
1360   @type nics: list of L{objects.NIC}
1361   @param nics: list of nics to convert to hooks tuples
1362
1363   """
1364   hooks_nics = []
1365   cluster = lu.cfg.GetClusterInfo()
1366   for nic in nics:
1367     ip = nic.ip
1368     mac = nic.mac
1369     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370     mode = filled_params[constants.NIC_MODE]
1371     link = filled_params[constants.NIC_LINK]
1372     hooks_nics.append((ip, mac, mode, link))
1373   return hooks_nics
1374
1375
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377   """Builds instance related env variables for hooks from an object.
1378
1379   @type lu: L{LogicalUnit}
1380   @param lu: the logical unit on whose behalf we execute
1381   @type instance: L{objects.Instance}
1382   @param instance: the instance for which we should build the
1383       environment
1384   @type override: dict
1385   @param override: dictionary with key/values that will override
1386       our values
1387   @rtype: dict
1388   @return: the hook environment dictionary
1389
1390   """
1391   cluster = lu.cfg.GetClusterInfo()
1392   bep = cluster.FillBE(instance)
1393   hvp = cluster.FillHV(instance)
1394   args = {
1395     "name": instance.name,
1396     "primary_node": instance.primary_node,
1397     "secondary_nodes": instance.secondary_nodes,
1398     "os_type": instance.os,
1399     "status": instance.admin_state,
1400     "maxmem": bep[constants.BE_MAXMEM],
1401     "minmem": bep[constants.BE_MINMEM],
1402     "vcpus": bep[constants.BE_VCPUS],
1403     "nics": _NICListToTuple(lu, instance.nics),
1404     "disk_template": instance.disk_template,
1405     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1406     "bep": bep,
1407     "hvp": hvp,
1408     "hypervisor_name": instance.hypervisor,
1409     "tags": instance.tags,
1410   }
1411   if override:
1412     args.update(override)
1413   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1414
1415
1416 def _AdjustCandidatePool(lu, exceptions):
1417   """Adjust the candidate pool after node operations.
1418
1419   """
1420   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1421   if mod_list:
1422     lu.LogInfo("Promoted nodes to master candidate role: %s",
1423                utils.CommaJoin(node.name for node in mod_list))
1424     for name in mod_list:
1425       lu.context.ReaddNode(name)
1426   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1427   if mc_now > mc_max:
1428     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1429                (mc_now, mc_max))
1430
1431
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433   """Decide whether I should promote myself as a master candidate.
1434
1435   """
1436   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438   # the new node will increase mc_max with one, so:
1439   mc_should = min(mc_should + 1, cp_size)
1440   return mc_now < mc_should
1441
1442
1443 def _CalculateGroupIPolicy(cluster, group):
1444   """Calculate instance policy for group.
1445
1446   """
1447   return cluster.SimpleFillIPolicy(group.ipolicy)
1448
1449
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451   """Computes a set of instances who violates given ipolicy.
1452
1453   @param ipolicy: The ipolicy to verify
1454   @type instances: object.Instance
1455   @param instances: List of instances to verify
1456   @return: A frozenset of instance names violating the ipolicy
1457
1458   """
1459   return frozenset([inst.name for inst in instances
1460                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1461
1462
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464   """Check that the brigdes needed by a list of nics exist.
1465
1466   """
1467   cluster = lu.cfg.GetClusterInfo()
1468   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469   brlist = [params[constants.NIC_LINK] for params in paramslist
1470             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1471   if brlist:
1472     result = lu.rpc.call_bridges_exist(target_node, brlist)
1473     result.Raise("Error checking bridges on destination node '%s'" %
1474                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1475
1476
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478   """Check that the brigdes needed by an instance exist.
1479
1480   """
1481   if node is None:
1482     node = instance.primary_node
1483   _CheckNicsBridgesExist(lu, instance.nics, node)
1484
1485
1486 def _CheckOSVariant(os_obj, name):
1487   """Check whether an OS name conforms to the os variants specification.
1488
1489   @type os_obj: L{objects.OS}
1490   @param os_obj: OS object to check
1491   @type name: string
1492   @param name: OS name passed by the user, to check for validity
1493
1494   """
1495   variant = objects.OS.GetVariant(name)
1496   if not os_obj.supported_variants:
1497     if variant:
1498       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499                                  " passed)" % (os_obj.name, variant),
1500                                  errors.ECODE_INVAL)
1501     return
1502   if not variant:
1503     raise errors.OpPrereqError("OS name must include a variant",
1504                                errors.ECODE_INVAL)
1505
1506   if variant not in os_obj.supported_variants:
1507     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1508
1509
1510 def _GetNodeInstancesInner(cfg, fn):
1511   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1512
1513
1514 def _GetNodeInstances(cfg, node_name):
1515   """Returns a list of all primary and secondary instances on a node.
1516
1517   """
1518
1519   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1520
1521
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523   """Returns primary instances on a node.
1524
1525   """
1526   return _GetNodeInstancesInner(cfg,
1527                                 lambda inst: node_name == inst.primary_node)
1528
1529
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531   """Returns secondary instances on a node.
1532
1533   """
1534   return _GetNodeInstancesInner(cfg,
1535                                 lambda inst: node_name in inst.secondary_nodes)
1536
1537
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539   """Returns the arguments for a storage type.
1540
1541   """
1542   # Special case for file storage
1543   if storage_type == constants.ST_FILE:
1544     # storage.FileStorage wants a list of storage directories
1545     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1546
1547   return []
1548
1549
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1551   faulty = []
1552
1553   for dev in instance.disks:
1554     cfg.SetDiskID(dev, node_name)
1555
1556   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557   result.Raise("Failed to get disk status from node %s" % node_name,
1558                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1559
1560   for idx, bdev_status in enumerate(result.payload):
1561     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1562       faulty.append(idx)
1563
1564   return faulty
1565
1566
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568   """Check the sanity of iallocator and node arguments and use the
1569   cluster-wide iallocator if appropriate.
1570
1571   Check that at most one of (iallocator, node) is specified. If none is
1572   specified, then the LU's opcode's iallocator slot is filled with the
1573   cluster-wide default iallocator.
1574
1575   @type iallocator_slot: string
1576   @param iallocator_slot: the name of the opcode iallocator slot
1577   @type node_slot: string
1578   @param node_slot: the name of the opcode target node slot
1579
1580   """
1581   node = getattr(lu.op, node_slot, None)
1582   iallocator = getattr(lu.op, iallocator_slot, None)
1583
1584   if node is not None and iallocator is not None:
1585     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1586                                errors.ECODE_INVAL)
1587   elif node is None and iallocator is None:
1588     default_iallocator = lu.cfg.GetDefaultIAllocator()
1589     if default_iallocator:
1590       setattr(lu.op, iallocator_slot, default_iallocator)
1591     else:
1592       raise errors.OpPrereqError("No iallocator or node given and no"
1593                                  " cluster-wide default iallocator found;"
1594                                  " please specify either an iallocator or a"
1595                                  " node, or set a cluster-wide default"
1596                                  " iallocator")
1597
1598
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600   """Decides on which iallocator to use.
1601
1602   @type cfg: L{config.ConfigWriter}
1603   @param cfg: Cluster configuration object
1604   @type iallocator: string or None
1605   @param iallocator: Iallocator specified in opcode
1606   @rtype: string
1607   @return: Iallocator name
1608
1609   """
1610   if not iallocator:
1611     # Use default iallocator
1612     iallocator = cfg.GetDefaultIAllocator()
1613
1614   if not iallocator:
1615     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616                                " opcode nor as a cluster-wide default",
1617                                errors.ECODE_INVAL)
1618
1619   return iallocator
1620
1621
1622 class LUClusterPostInit(LogicalUnit):
1623   """Logical unit for running hooks after cluster initialization.
1624
1625   """
1626   HPATH = "cluster-init"
1627   HTYPE = constants.HTYPE_CLUSTER
1628
1629   def BuildHooksEnv(self):
1630     """Build hooks env.
1631
1632     """
1633     return {
1634       "OP_TARGET": self.cfg.GetClusterName(),
1635       }
1636
1637   def BuildHooksNodes(self):
1638     """Build hooks nodes.
1639
1640     """
1641     return ([], [self.cfg.GetMasterNode()])
1642
1643   def Exec(self, feedback_fn):
1644     """Nothing to do.
1645
1646     """
1647     return True
1648
1649
1650 class LUClusterDestroy(LogicalUnit):
1651   """Logical unit for destroying the cluster.
1652
1653   """
1654   HPATH = "cluster-destroy"
1655   HTYPE = constants.HTYPE_CLUSTER
1656
1657   def BuildHooksEnv(self):
1658     """Build hooks env.
1659
1660     """
1661     return {
1662       "OP_TARGET": self.cfg.GetClusterName(),
1663       }
1664
1665   def BuildHooksNodes(self):
1666     """Build hooks nodes.
1667
1668     """
1669     return ([], [])
1670
1671   def CheckPrereq(self):
1672     """Check prerequisites.
1673
1674     This checks whether the cluster is empty.
1675
1676     Any errors are signaled by raising errors.OpPrereqError.
1677
1678     """
1679     master = self.cfg.GetMasterNode()
1680
1681     nodelist = self.cfg.GetNodeList()
1682     if len(nodelist) != 1 or nodelist[0] != master:
1683       raise errors.OpPrereqError("There are still %d node(s) in"
1684                                  " this cluster." % (len(nodelist) - 1),
1685                                  errors.ECODE_INVAL)
1686     instancelist = self.cfg.GetInstanceList()
1687     if instancelist:
1688       raise errors.OpPrereqError("There are still %d instance(s) in"
1689                                  " this cluster." % len(instancelist),
1690                                  errors.ECODE_INVAL)
1691
1692   def Exec(self, feedback_fn):
1693     """Destroys the cluster.
1694
1695     """
1696     master_params = self.cfg.GetMasterNetworkParameters()
1697
1698     # Run post hooks on master node before it's removed
1699     _RunPostHook(self, master_params.name)
1700
1701     ems = self.cfg.GetUseExternalMipScript()
1702     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1703                                                      master_params, ems)
1704     if result.fail_msg:
1705       self.LogWarning("Error disabling the master IP address: %s",
1706                       result.fail_msg)
1707
1708     return master_params.name
1709
1710
1711 def _VerifyCertificate(filename):
1712   """Verifies a certificate for L{LUClusterVerifyConfig}.
1713
1714   @type filename: string
1715   @param filename: Path to PEM file
1716
1717   """
1718   try:
1719     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720                                            utils.ReadFile(filename))
1721   except Exception, err: # pylint: disable=W0703
1722     return (LUClusterVerifyConfig.ETYPE_ERROR,
1723             "Failed to load X509 certificate %s: %s" % (filename, err))
1724
1725   (errcode, msg) = \
1726     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727                                 constants.SSL_CERT_EXPIRATION_ERROR)
1728
1729   if msg:
1730     fnamemsg = "While verifying %s: %s" % (filename, msg)
1731   else:
1732     fnamemsg = None
1733
1734   if errcode is None:
1735     return (None, fnamemsg)
1736   elif errcode == utils.CERT_WARNING:
1737     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738   elif errcode == utils.CERT_ERROR:
1739     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1740
1741   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1742
1743
1744 def _GetAllHypervisorParameters(cluster, instances):
1745   """Compute the set of all hypervisor parameters.
1746
1747   @type cluster: L{objects.Cluster}
1748   @param cluster: the cluster object
1749   @param instances: list of L{objects.Instance}
1750   @param instances: additional instances from which to obtain parameters
1751   @rtype: list of (origin, hypervisor, parameters)
1752   @return: a list with all parameters found, indicating the hypervisor they
1753        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1754
1755   """
1756   hvp_data = []
1757
1758   for hv_name in cluster.enabled_hypervisors:
1759     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1760
1761   for os_name, os_hvp in cluster.os_hvp.items():
1762     for hv_name, hv_params in os_hvp.items():
1763       if hv_params:
1764         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1766
1767   # TODO: collapse identical parameter values in a single one
1768   for instance in instances:
1769     if instance.hvparams:
1770       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771                        cluster.FillHV(instance)))
1772
1773   return hvp_data
1774
1775
1776 class _VerifyErrors(object):
1777   """Mix-in for cluster/group verify LUs.
1778
1779   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780   self.op and self._feedback_fn to be available.)
1781
1782   """
1783
1784   ETYPE_FIELD = "code"
1785   ETYPE_ERROR = "ERROR"
1786   ETYPE_WARNING = "WARNING"
1787
1788   def _Error(self, ecode, item, msg, *args, **kwargs):
1789     """Format an error message.
1790
1791     Based on the opcode's error_codes parameter, either format a
1792     parseable error code, or a simpler error string.
1793
1794     This must be called only from Exec and functions called from Exec.
1795
1796     """
1797     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798     itype, etxt, _ = ecode
1799     # first complete the msg
1800     if args:
1801       msg = msg % args
1802     # then format the whole message
1803     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1805     else:
1806       if item:
1807         item = " " + item
1808       else:
1809         item = ""
1810       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811     # and finally report it via the feedback_fn
1812     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1813
1814   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815     """Log an error message if the passed condition is True.
1816
1817     """
1818     cond = (bool(cond)
1819             or self.op.debug_simulate_errors) # pylint: disable=E1101
1820
1821     # If the error code is in the list of ignored errors, demote the error to a
1822     # warning
1823     (_, etxt, _) = ecode
1824     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1825       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1826
1827     if cond:
1828       self._Error(ecode, *args, **kwargs)
1829
1830     # do not mark the operation as failed for WARN cases only
1831     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832       self.bad = self.bad or cond
1833
1834
1835 class LUClusterVerify(NoHooksLU):
1836   """Submits all jobs necessary to verify the cluster.
1837
1838   """
1839   REQ_BGL = False
1840
1841   def ExpandNames(self):
1842     self.needed_locks = {}
1843
1844   def Exec(self, feedback_fn):
1845     jobs = []
1846
1847     if self.op.group_name:
1848       groups = [self.op.group_name]
1849       depends_fn = lambda: None
1850     else:
1851       groups = self.cfg.GetNodeGroupList()
1852
1853       # Verify global configuration
1854       jobs.append([
1855         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1856         ])
1857
1858       # Always depend on global verification
1859       depends_fn = lambda: [(-len(jobs), [])]
1860
1861     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862                                             ignore_errors=self.op.ignore_errors,
1863                                             depends=depends_fn())]
1864                 for group in groups)
1865
1866     # Fix up all parameters
1867     for op in itertools.chain(*jobs): # pylint: disable=W0142
1868       op.debug_simulate_errors = self.op.debug_simulate_errors
1869       op.verbose = self.op.verbose
1870       op.error_codes = self.op.error_codes
1871       try:
1872         op.skip_checks = self.op.skip_checks
1873       except AttributeError:
1874         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1875
1876     return ResultWithJobs(jobs)
1877
1878
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880   """Verifies the cluster config.
1881
1882   """
1883   REQ_BGL = True
1884
1885   def _VerifyHVP(self, hvp_data):
1886     """Verifies locally the syntax of the hypervisor parameters.
1887
1888     """
1889     for item, hv_name, hv_params in hvp_data:
1890       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1891              (item, hv_name))
1892       try:
1893         hv_class = hypervisor.GetHypervisor(hv_name)
1894         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895         hv_class.CheckParameterSyntax(hv_params)
1896       except errors.GenericError, err:
1897         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1898
1899   def ExpandNames(self):
1900     # Information can be safely retrieved as the BGL is acquired in exclusive
1901     # mode
1902     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904     self.all_node_info = self.cfg.GetAllNodesInfo()
1905     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906     self.needed_locks = {}
1907
1908   def Exec(self, feedback_fn):
1909     """Verify integrity of cluster, performing various test on nodes.
1910
1911     """
1912     self.bad = False
1913     self._feedback_fn = feedback_fn
1914
1915     feedback_fn("* Verifying cluster config")
1916
1917     for msg in self.cfg.VerifyConfig():
1918       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1919
1920     feedback_fn("* Verifying cluster certificate files")
1921
1922     for cert_filename in constants.ALL_CERT_FILES:
1923       (errcode, msg) = _VerifyCertificate(cert_filename)
1924       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1925
1926     feedback_fn("* Verifying hypervisor parameters")
1927
1928     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929                                                 self.all_inst_info.values()))
1930
1931     feedback_fn("* Verifying all nodes belong to an existing group")
1932
1933     # We do this verification here because, should this bogus circumstance
1934     # occur, it would never be caught by VerifyGroup, which only acts on
1935     # nodes/instances reachable from existing node groups.
1936
1937     dangling_nodes = set(node.name for node in self.all_node_info.values()
1938                          if node.group not in self.all_group_info)
1939
1940     dangling_instances = {}
1941     no_node_instances = []
1942
1943     for inst in self.all_inst_info.values():
1944       if inst.primary_node in dangling_nodes:
1945         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946       elif inst.primary_node not in self.all_node_info:
1947         no_node_instances.append(inst.name)
1948
1949     pretty_dangling = [
1950         "%s (%s)" %
1951         (node.name,
1952          utils.CommaJoin(dangling_instances.get(node.name,
1953                                                 ["no instances"])))
1954         for node in dangling_nodes]
1955
1956     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1957                   None,
1958                   "the following nodes (and their instances) belong to a non"
1959                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1960
1961     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1962                   None,
1963                   "the following instances have a non-existing primary-node:"
1964                   " %s", utils.CommaJoin(no_node_instances))
1965
1966     return not self.bad
1967
1968
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970   """Verifies the status of a node group.
1971
1972   """
1973   HPATH = "cluster-verify"
1974   HTYPE = constants.HTYPE_CLUSTER
1975   REQ_BGL = False
1976
1977   _HOOKS_INDENT_RE = re.compile("^", re.M)
1978
1979   class NodeImage(object):
1980     """A class representing the logical and physical status of a node.
1981
1982     @type name: string
1983     @ivar name: the node name to which this object refers
1984     @ivar volumes: a structure as returned from
1985         L{ganeti.backend.GetVolumeList} (runtime)
1986     @ivar instances: a list of running instances (runtime)
1987     @ivar pinst: list of configured primary instances (config)
1988     @ivar sinst: list of configured secondary instances (config)
1989     @ivar sbp: dictionary of {primary-node: list of instances} for all
1990         instances for which this node is secondary (config)
1991     @ivar mfree: free memory, as reported by hypervisor (runtime)
1992     @ivar dfree: free disk, as reported by the node (runtime)
1993     @ivar offline: the offline status (config)
1994     @type rpc_fail: boolean
1995     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996         not whether the individual keys were correct) (runtime)
1997     @type lvm_fail: boolean
1998     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999     @type hyp_fail: boolean
2000     @ivar hyp_fail: whether the RPC call didn't return the instance list
2001     @type ghost: boolean
2002     @ivar ghost: whether this is a known node or not (config)
2003     @type os_fail: boolean
2004     @ivar os_fail: whether the RPC call didn't return valid OS data
2005     @type oslist: list
2006     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007     @type vm_capable: boolean
2008     @ivar vm_capable: whether the node can host instances
2009
2010     """
2011     def __init__(self, offline=False, name=None, vm_capable=True):
2012       self.name = name
2013       self.volumes = {}
2014       self.instances = []
2015       self.pinst = []
2016       self.sinst = []
2017       self.sbp = {}
2018       self.mfree = 0
2019       self.dfree = 0
2020       self.offline = offline
2021       self.vm_capable = vm_capable
2022       self.rpc_fail = False
2023       self.lvm_fail = False
2024       self.hyp_fail = False
2025       self.ghost = False
2026       self.os_fail = False
2027       self.oslist = {}
2028
2029   def ExpandNames(self):
2030     # This raises errors.OpPrereqError on its own:
2031     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2032
2033     # Get instances in node group; this is unsafe and needs verification later
2034     inst_names = \
2035       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2036
2037     self.needed_locks = {
2038       locking.LEVEL_INSTANCE: inst_names,
2039       locking.LEVEL_NODEGROUP: [self.group_uuid],
2040       locking.LEVEL_NODE: [],
2041       }
2042
2043     self.share_locks = _ShareAll()
2044
2045   def DeclareLocks(self, level):
2046     if level == locking.LEVEL_NODE:
2047       # Get members of node group; this is unsafe and needs verification later
2048       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2049
2050       all_inst_info = self.cfg.GetAllInstancesInfo()
2051
2052       # In Exec(), we warn about mirrored instances that have primary and
2053       # secondary living in separate node groups. To fully verify that
2054       # volumes for these instances are healthy, we will need to do an
2055       # extra call to their secondaries. We ensure here those nodes will
2056       # be locked.
2057       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2058         # Important: access only the instances whose lock is owned
2059         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2060           nodes.update(all_inst_info[inst].secondary_nodes)
2061
2062       self.needed_locks[locking.LEVEL_NODE] = nodes
2063
2064   def CheckPrereq(self):
2065     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2066     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2067
2068     group_nodes = set(self.group_info.members)
2069     group_instances = \
2070       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2071
2072     unlocked_nodes = \
2073         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2074
2075     unlocked_instances = \
2076         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2077
2078     if unlocked_nodes:
2079       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2080                                  utils.CommaJoin(unlocked_nodes),
2081                                  errors.ECODE_STATE)
2082
2083     if unlocked_instances:
2084       raise errors.OpPrereqError("Missing lock for instances: %s" %
2085                                  utils.CommaJoin(unlocked_instances),
2086                                  errors.ECODE_STATE)
2087
2088     self.all_node_info = self.cfg.GetAllNodesInfo()
2089     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2090
2091     self.my_node_names = utils.NiceSort(group_nodes)
2092     self.my_inst_names = utils.NiceSort(group_instances)
2093
2094     self.my_node_info = dict((name, self.all_node_info[name])
2095                              for name in self.my_node_names)
2096
2097     self.my_inst_info = dict((name, self.all_inst_info[name])
2098                              for name in self.my_inst_names)
2099
2100     # We detect here the nodes that will need the extra RPC calls for verifying
2101     # split LV volumes; they should be locked.
2102     extra_lv_nodes = set()
2103
2104     for inst in self.my_inst_info.values():
2105       if inst.disk_template in constants.DTS_INT_MIRROR:
2106         for nname in inst.all_nodes:
2107           if self.all_node_info[nname].group != self.group_uuid:
2108             extra_lv_nodes.add(nname)
2109
2110     unlocked_lv_nodes = \
2111         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2112
2113     if unlocked_lv_nodes:
2114       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2115                                  utils.CommaJoin(unlocked_lv_nodes),
2116                                  errors.ECODE_STATE)
2117     self.extra_lv_nodes = list(extra_lv_nodes)
2118
2119   def _VerifyNode(self, ninfo, nresult):
2120     """Perform some basic validation on data returned from a node.
2121
2122       - check the result data structure is well formed and has all the
2123         mandatory fields
2124       - check ganeti version
2125
2126     @type ninfo: L{objects.Node}
2127     @param ninfo: the node to check
2128     @param nresult: the results from the node
2129     @rtype: boolean
2130     @return: whether overall this call was successful (and we can expect
2131          reasonable values in the respose)
2132
2133     """
2134     node = ninfo.name
2135     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2136
2137     # main result, nresult should be a non-empty dict
2138     test = not nresult or not isinstance(nresult, dict)
2139     _ErrorIf(test, constants.CV_ENODERPC, node,
2140                   "unable to verify node: no data returned")
2141     if test:
2142       return False
2143
2144     # compares ganeti version
2145     local_version = constants.PROTOCOL_VERSION
2146     remote_version = nresult.get("version", None)
2147     test = not (remote_version and
2148                 isinstance(remote_version, (list, tuple)) and
2149                 len(remote_version) == 2)
2150     _ErrorIf(test, constants.CV_ENODERPC, node,
2151              "connection to node returned invalid data")
2152     if test:
2153       return False
2154
2155     test = local_version != remote_version[0]
2156     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2157              "incompatible protocol versions: master %s,"
2158              " node %s", local_version, remote_version[0])
2159     if test:
2160       return False
2161
2162     # node seems compatible, we can actually try to look into its results
2163
2164     # full package version
2165     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2166                   constants.CV_ENODEVERSION, node,
2167                   "software version mismatch: master %s, node %s",
2168                   constants.RELEASE_VERSION, remote_version[1],
2169                   code=self.ETYPE_WARNING)
2170
2171     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2172     if ninfo.vm_capable and isinstance(hyp_result, dict):
2173       for hv_name, hv_result in hyp_result.iteritems():
2174         test = hv_result is not None
2175         _ErrorIf(test, constants.CV_ENODEHV, node,
2176                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2177
2178     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2179     if ninfo.vm_capable and isinstance(hvp_result, list):
2180       for item, hv_name, hv_result in hvp_result:
2181         _ErrorIf(True, constants.CV_ENODEHV, node,
2182                  "hypervisor %s parameter verify failure (source %s): %s",
2183                  hv_name, item, hv_result)
2184
2185     test = nresult.get(constants.NV_NODESETUP,
2186                        ["Missing NODESETUP results"])
2187     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2188              "; ".join(test))
2189
2190     return True
2191
2192   def _VerifyNodeTime(self, ninfo, nresult,
2193                       nvinfo_starttime, nvinfo_endtime):
2194     """Check the node time.
2195
2196     @type ninfo: L{objects.Node}
2197     @param ninfo: the node to check
2198     @param nresult: the remote results for the node
2199     @param nvinfo_starttime: the start time of the RPC call
2200     @param nvinfo_endtime: the end time of the RPC call
2201
2202     """
2203     node = ninfo.name
2204     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2205
2206     ntime = nresult.get(constants.NV_TIME, None)
2207     try:
2208       ntime_merged = utils.MergeTime(ntime)
2209     except (ValueError, TypeError):
2210       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2211       return
2212
2213     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2214       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2215     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2216       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2217     else:
2218       ntime_diff = None
2219
2220     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2221              "Node time diverges by at least %s from master node time",
2222              ntime_diff)
2223
2224   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2225     """Check the node LVM results.
2226
2227     @type ninfo: L{objects.Node}
2228     @param ninfo: the node to check
2229     @param nresult: the remote results for the node
2230     @param vg_name: the configured VG name
2231
2232     """
2233     if vg_name is None:
2234       return
2235
2236     node = ninfo.name
2237     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2238
2239     # checks vg existence and size > 20G
2240     vglist = nresult.get(constants.NV_VGLIST, None)
2241     test = not vglist
2242     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2243     if not test:
2244       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2245                                             constants.MIN_VG_SIZE)
2246       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2247
2248     # check pv names
2249     pvlist = nresult.get(constants.NV_PVLIST, None)
2250     test = pvlist is None
2251     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2252     if not test:
2253       # check that ':' is not present in PV names, since it's a
2254       # special character for lvcreate (denotes the range of PEs to
2255       # use on the PV)
2256       for _, pvname, owner_vg in pvlist:
2257         test = ":" in pvname
2258         _ErrorIf(test, constants.CV_ENODELVM, node,
2259                  "Invalid character ':' in PV '%s' of VG '%s'",
2260                  pvname, owner_vg)
2261
2262   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2263     """Check the node bridges.
2264
2265     @type ninfo: L{objects.Node}
2266     @param ninfo: the node to check
2267     @param nresult: the remote results for the node
2268     @param bridges: the expected list of bridges
2269
2270     """
2271     if not bridges:
2272       return
2273
2274     node = ninfo.name
2275     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2276
2277     missing = nresult.get(constants.NV_BRIDGES, None)
2278     test = not isinstance(missing, list)
2279     _ErrorIf(test, constants.CV_ENODENET, node,
2280              "did not return valid bridge information")
2281     if not test:
2282       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2283                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2284
2285   def _VerifyNodeUserScripts(self, ninfo, nresult):
2286     """Check the results of user scripts presence and executability on the node
2287
2288     @type ninfo: L{objects.Node}
2289     @param ninfo: the node to check
2290     @param nresult: the remote results for the node
2291
2292     """
2293     node = ninfo.name
2294
2295     test = not constants.NV_USERSCRIPTS in nresult
2296     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2297                   "did not return user scripts information")
2298
2299     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2300     if not test:
2301       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2302                     "user scripts not present or not executable: %s" %
2303                     utils.CommaJoin(sorted(broken_scripts)))
2304
2305   def _VerifyNodeNetwork(self, ninfo, nresult):
2306     """Check the node network connectivity results.
2307
2308     @type ninfo: L{objects.Node}
2309     @param ninfo: the node to check
2310     @param nresult: the remote results for the node
2311
2312     """
2313     node = ninfo.name
2314     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315
2316     test = constants.NV_NODELIST not in nresult
2317     _ErrorIf(test, constants.CV_ENODESSH, node,
2318              "node hasn't returned node ssh connectivity data")
2319     if not test:
2320       if nresult[constants.NV_NODELIST]:
2321         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2322           _ErrorIf(True, constants.CV_ENODESSH, node,
2323                    "ssh communication with node '%s': %s", a_node, a_msg)
2324
2325     test = constants.NV_NODENETTEST not in nresult
2326     _ErrorIf(test, constants.CV_ENODENET, node,
2327              "node hasn't returned node tcp connectivity data")
2328     if not test:
2329       if nresult[constants.NV_NODENETTEST]:
2330         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2331         for anode in nlist:
2332           _ErrorIf(True, constants.CV_ENODENET, node,
2333                    "tcp communication with node '%s': %s",
2334                    anode, nresult[constants.NV_NODENETTEST][anode])
2335
2336     test = constants.NV_MASTERIP not in nresult
2337     _ErrorIf(test, constants.CV_ENODENET, node,
2338              "node hasn't returned node master IP reachability data")
2339     if not test:
2340       if not nresult[constants.NV_MASTERIP]:
2341         if node == self.master_node:
2342           msg = "the master node cannot reach the master IP (not configured?)"
2343         else:
2344           msg = "cannot reach the master IP"
2345         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2346
2347   def _VerifyInstance(self, instance, instanceconfig, node_image,
2348                       diskstatus):
2349     """Verify an instance.
2350
2351     This function checks to see if the required block devices are
2352     available on the instance's node.
2353
2354     """
2355     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2356     node_current = instanceconfig.primary_node
2357
2358     node_vol_should = {}
2359     instanceconfig.MapLVsByNode(node_vol_should)
2360
2361     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2362     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2363     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2364
2365     for node in node_vol_should:
2366       n_img = node_image[node]
2367       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2368         # ignore missing volumes on offline or broken nodes
2369         continue
2370       for volume in node_vol_should[node]:
2371         test = volume not in n_img.volumes
2372         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2373                  "volume %s missing on node %s", volume, node)
2374
2375     if instanceconfig.admin_state == constants.ADMINST_UP:
2376       pri_img = node_image[node_current]
2377       test = instance not in pri_img.instances and not pri_img.offline
2378       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2379                "instance not running on its primary node %s",
2380                node_current)
2381
2382     diskdata = [(nname, success, status, idx)
2383                 for (nname, disks) in diskstatus.items()
2384                 for idx, (success, status) in enumerate(disks)]
2385
2386     for nname, success, bdev_status, idx in diskdata:
2387       # the 'ghost node' construction in Exec() ensures that we have a
2388       # node here
2389       snode = node_image[nname]
2390       bad_snode = snode.ghost or snode.offline
2391       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2392                not success and not bad_snode,
2393                constants.CV_EINSTANCEFAULTYDISK, instance,
2394                "couldn't retrieve status for disk/%s on %s: %s",
2395                idx, nname, bdev_status)
2396       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2397                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2398                constants.CV_EINSTANCEFAULTYDISK, instance,
2399                "disk/%s on %s is faulty", idx, nname)
2400
2401   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2402     """Verify if there are any unknown volumes in the cluster.
2403
2404     The .os, .swap and backup volumes are ignored. All other volumes are
2405     reported as unknown.
2406
2407     @type reserved: L{ganeti.utils.FieldSet}
2408     @param reserved: a FieldSet of reserved volume names
2409
2410     """
2411     for node, n_img in node_image.items():
2412       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2413           self.all_node_info[node].group != self.group_uuid):
2414         # skip non-healthy nodes
2415         continue
2416       for volume in n_img.volumes:
2417         test = ((node not in node_vol_should or
2418                 volume not in node_vol_should[node]) and
2419                 not reserved.Matches(volume))
2420         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2421                       "volume %s is unknown", volume)
2422
2423   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2424     """Verify N+1 Memory Resilience.
2425
2426     Check that if one single node dies we can still start all the
2427     instances it was primary for.
2428
2429     """
2430     cluster_info = self.cfg.GetClusterInfo()
2431     for node, n_img in node_image.items():
2432       # This code checks that every node which is now listed as
2433       # secondary has enough memory to host all instances it is
2434       # supposed to should a single other node in the cluster fail.
2435       # FIXME: not ready for failover to an arbitrary node
2436       # FIXME: does not support file-backed instances
2437       # WARNING: we currently take into account down instances as well
2438       # as up ones, considering that even if they're down someone
2439       # might want to start them even in the event of a node failure.
2440       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2441         # we're skipping nodes marked offline and nodes in other groups from
2442         # the N+1 warning, since most likely we don't have good memory
2443         # infromation from them; we already list instances living on such
2444         # nodes, and that's enough warning
2445         continue
2446       #TODO(dynmem): also consider ballooning out other instances
2447       for prinode, instances in n_img.sbp.items():
2448         needed_mem = 0
2449         for instance in instances:
2450           bep = cluster_info.FillBE(instance_cfg[instance])
2451           if bep[constants.BE_AUTO_BALANCE]:
2452             needed_mem += bep[constants.BE_MINMEM]
2453         test = n_img.mfree < needed_mem
2454         self._ErrorIf(test, constants.CV_ENODEN1, node,
2455                       "not enough memory to accomodate instance failovers"
2456                       " should node %s fail (%dMiB needed, %dMiB available)",
2457                       prinode, needed_mem, n_img.mfree)
2458
2459   @classmethod
2460   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2461                    (files_all, files_opt, files_mc, files_vm)):
2462     """Verifies file checksums collected from all nodes.
2463
2464     @param errorif: Callback for reporting errors
2465     @param nodeinfo: List of L{objects.Node} objects
2466     @param master_node: Name of master node
2467     @param all_nvinfo: RPC results
2468
2469     """
2470     # Define functions determining which nodes to consider for a file
2471     files2nodefn = [
2472       (files_all, None),
2473       (files_mc, lambda node: (node.master_candidate or
2474                                node.name == master_node)),
2475       (files_vm, lambda node: node.vm_capable),
2476       ]
2477
2478     # Build mapping from filename to list of nodes which should have the file
2479     nodefiles = {}
2480     for (files, fn) in files2nodefn:
2481       if fn is None:
2482         filenodes = nodeinfo
2483       else:
2484         filenodes = filter(fn, nodeinfo)
2485       nodefiles.update((filename,
2486                         frozenset(map(operator.attrgetter("name"), filenodes)))
2487                        for filename in files)
2488
2489     assert set(nodefiles) == (files_all | files_mc | files_vm)
2490
2491     fileinfo = dict((filename, {}) for filename in nodefiles)
2492     ignore_nodes = set()
2493
2494     for node in nodeinfo:
2495       if node.offline:
2496         ignore_nodes.add(node.name)
2497         continue
2498
2499       nresult = all_nvinfo[node.name]
2500
2501       if nresult.fail_msg or not nresult.payload:
2502         node_files = None
2503       else:
2504         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2505
2506       test = not (node_files and isinstance(node_files, dict))
2507       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2508               "Node did not return file checksum data")
2509       if test:
2510         ignore_nodes.add(node.name)
2511         continue
2512
2513       # Build per-checksum mapping from filename to nodes having it
2514       for (filename, checksum) in node_files.items():
2515         assert filename in nodefiles
2516         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2517
2518     for (filename, checksums) in fileinfo.items():
2519       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2520
2521       # Nodes having the file
2522       with_file = frozenset(node_name
2523                             for nodes in fileinfo[filename].values()
2524                             for node_name in nodes) - ignore_nodes
2525
2526       expected_nodes = nodefiles[filename] - ignore_nodes
2527
2528       # Nodes missing file
2529       missing_file = expected_nodes - with_file
2530
2531       if filename in files_opt:
2532         # All or no nodes
2533         errorif(missing_file and missing_file != expected_nodes,
2534                 constants.CV_ECLUSTERFILECHECK, None,
2535                 "File %s is optional, but it must exist on all or no"
2536                 " nodes (not found on %s)",
2537                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2538       else:
2539         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2540                 "File %s is missing from node(s) %s", filename,
2541                 utils.CommaJoin(utils.NiceSort(missing_file)))
2542
2543         # Warn if a node has a file it shouldn't
2544         unexpected = with_file - expected_nodes
2545         errorif(unexpected,
2546                 constants.CV_ECLUSTERFILECHECK, None,
2547                 "File %s should not exist on node(s) %s",
2548                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2549
2550       # See if there are multiple versions of the file
2551       test = len(checksums) > 1
2552       if test:
2553         variants = ["variant %s on %s" %
2554                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2555                     for (idx, (checksum, nodes)) in
2556                       enumerate(sorted(checksums.items()))]
2557       else:
2558         variants = []
2559
2560       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2561               "File %s found with %s different checksums (%s)",
2562               filename, len(checksums), "; ".join(variants))
2563
2564   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2565                       drbd_map):
2566     """Verifies and the node DRBD status.
2567
2568     @type ninfo: L{objects.Node}
2569     @param ninfo: the node to check
2570     @param nresult: the remote results for the node
2571     @param instanceinfo: the dict of instances
2572     @param drbd_helper: the configured DRBD usermode helper
2573     @param drbd_map: the DRBD map as returned by
2574         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2575
2576     """
2577     node = ninfo.name
2578     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2579
2580     if drbd_helper:
2581       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2582       test = (helper_result == None)
2583       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584                "no drbd usermode helper returned")
2585       if helper_result:
2586         status, payload = helper_result
2587         test = not status
2588         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589                  "drbd usermode helper check unsuccessful: %s", payload)
2590         test = status and (payload != drbd_helper)
2591         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2592                  "wrong drbd usermode helper: %s", payload)
2593
2594     # compute the DRBD minors
2595     node_drbd = {}
2596     for minor, instance in drbd_map[node].items():
2597       test = instance not in instanceinfo
2598       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2599                "ghost instance '%s' in temporary DRBD map", instance)
2600         # ghost instance should not be running, but otherwise we
2601         # don't give double warnings (both ghost instance and
2602         # unallocated minor in use)
2603       if test:
2604         node_drbd[minor] = (instance, False)
2605       else:
2606         instance = instanceinfo[instance]
2607         node_drbd[minor] = (instance.name,
2608                             instance.admin_state == constants.ADMINST_UP)
2609
2610     # and now check them
2611     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2612     test = not isinstance(used_minors, (tuple, list))
2613     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2614              "cannot parse drbd status file: %s", str(used_minors))
2615     if test:
2616       # we cannot check drbd status
2617       return
2618
2619     for minor, (iname, must_exist) in node_drbd.items():
2620       test = minor not in used_minors and must_exist
2621       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2622                "drbd minor %d of instance %s is not active", minor, iname)
2623     for minor in used_minors:
2624       test = minor not in node_drbd
2625       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2626                "unallocated drbd minor %d is in use", minor)
2627
2628   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2629     """Builds the node OS structures.
2630
2631     @type ninfo: L{objects.Node}
2632     @param ninfo: the node to check
2633     @param nresult: the remote results for the node
2634     @param nimg: the node image object
2635
2636     """
2637     node = ninfo.name
2638     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2639
2640     remote_os = nresult.get(constants.NV_OSLIST, None)
2641     test = (not isinstance(remote_os, list) or
2642             not compat.all(isinstance(v, list) and len(v) == 7
2643                            for v in remote_os))
2644
2645     _ErrorIf(test, constants.CV_ENODEOS, node,
2646              "node hasn't returned valid OS data")
2647
2648     nimg.os_fail = test
2649
2650     if test:
2651       return
2652
2653     os_dict = {}
2654
2655     for (name, os_path, status, diagnose,
2656          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2657
2658       if name not in os_dict:
2659         os_dict[name] = []
2660
2661       # parameters is a list of lists instead of list of tuples due to
2662       # JSON lacking a real tuple type, fix it:
2663       parameters = [tuple(v) for v in parameters]
2664       os_dict[name].append((os_path, status, diagnose,
2665                             set(variants), set(parameters), set(api_ver)))
2666
2667     nimg.oslist = os_dict
2668
2669   def _VerifyNodeOS(self, ninfo, nimg, base):
2670     """Verifies the node OS list.
2671
2672     @type ninfo: L{objects.Node}
2673     @param ninfo: the node to check
2674     @param nimg: the node image object
2675     @param base: the 'template' node we match against (e.g. from the master)
2676
2677     """
2678     node = ninfo.name
2679     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2680
2681     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2682
2683     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2684     for os_name, os_data in nimg.oslist.items():
2685       assert os_data, "Empty OS status for OS %s?!" % os_name
2686       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2687       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2688                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2689       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2690                "OS '%s' has multiple entries (first one shadows the rest): %s",
2691                os_name, utils.CommaJoin([v[0] for v in os_data]))
2692       # comparisons with the 'base' image
2693       test = os_name not in base.oslist
2694       _ErrorIf(test, constants.CV_ENODEOS, node,
2695                "Extra OS %s not present on reference node (%s)",
2696                os_name, base.name)
2697       if test:
2698         continue
2699       assert base.oslist[os_name], "Base node has empty OS status?"
2700       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2701       if not b_status:
2702         # base OS is invalid, skipping
2703         continue
2704       for kind, a, b in [("API version", f_api, b_api),
2705                          ("variants list", f_var, b_var),
2706                          ("parameters", beautify_params(f_param),
2707                           beautify_params(b_param))]:
2708         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2709                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2710                  kind, os_name, base.name,
2711                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2712
2713     # check any missing OSes
2714     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2715     _ErrorIf(missing, constants.CV_ENODEOS, node,
2716              "OSes present on reference node %s but missing on this node: %s",
2717              base.name, utils.CommaJoin(missing))
2718
2719   def _VerifyOob(self, ninfo, nresult):
2720     """Verifies out of band functionality of a node.
2721
2722     @type ninfo: L{objects.Node}
2723     @param ninfo: the node to check
2724     @param nresult: the remote results for the node
2725
2726     """
2727     node = ninfo.name
2728     # We just have to verify the paths on master and/or master candidates
2729     # as the oob helper is invoked on the master
2730     if ((ninfo.master_candidate or ninfo.master_capable) and
2731         constants.NV_OOB_PATHS in nresult):
2732       for path_result in nresult[constants.NV_OOB_PATHS]:
2733         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2734
2735   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2736     """Verifies and updates the node volume data.
2737
2738     This function will update a L{NodeImage}'s internal structures
2739     with data from the remote call.
2740
2741     @type ninfo: L{objects.Node}
2742     @param ninfo: the node to check
2743     @param nresult: the remote results for the node
2744     @param nimg: the node image object
2745     @param vg_name: the configured VG name
2746
2747     """
2748     node = ninfo.name
2749     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2750
2751     nimg.lvm_fail = True
2752     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2753     if vg_name is None:
2754       pass
2755     elif isinstance(lvdata, basestring):
2756       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2757                utils.SafeEncode(lvdata))
2758     elif not isinstance(lvdata, dict):
2759       _ErrorIf(True, constants.CV_ENODELVM, node,
2760                "rpc call to node failed (lvlist)")
2761     else:
2762       nimg.volumes = lvdata
2763       nimg.lvm_fail = False
2764
2765   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2766     """Verifies and updates the node instance list.
2767
2768     If the listing was successful, then updates this node's instance
2769     list. Otherwise, it marks the RPC call as failed for the instance
2770     list key.
2771
2772     @type ninfo: L{objects.Node}
2773     @param ninfo: the node to check
2774     @param nresult: the remote results for the node
2775     @param nimg: the node image object
2776
2777     """
2778     idata = nresult.get(constants.NV_INSTANCELIST, None)
2779     test = not isinstance(idata, list)
2780     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2781                   "rpc call to node failed (instancelist): %s",
2782                   utils.SafeEncode(str(idata)))
2783     if test:
2784       nimg.hyp_fail = True
2785     else:
2786       nimg.instances = idata
2787
2788   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2789     """Verifies and computes a node information map
2790
2791     @type ninfo: L{objects.Node}
2792     @param ninfo: the node to check
2793     @param nresult: the remote results for the node
2794     @param nimg: the node image object
2795     @param vg_name: the configured VG name
2796
2797     """
2798     node = ninfo.name
2799     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2800
2801     # try to read free memory (from the hypervisor)
2802     hv_info = nresult.get(constants.NV_HVINFO, None)
2803     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2804     _ErrorIf(test, constants.CV_ENODEHV, node,
2805              "rpc call to node failed (hvinfo)")
2806     if not test:
2807       try:
2808         nimg.mfree = int(hv_info["memory_free"])
2809       except (ValueError, TypeError):
2810         _ErrorIf(True, constants.CV_ENODERPC, node,
2811                  "node returned invalid nodeinfo, check hypervisor")
2812
2813     # FIXME: devise a free space model for file based instances as well
2814     if vg_name is not None:
2815       test = (constants.NV_VGLIST not in nresult or
2816               vg_name not in nresult[constants.NV_VGLIST])
2817       _ErrorIf(test, constants.CV_ENODELVM, node,
2818                "node didn't return data for the volume group '%s'"
2819                " - it is either missing or broken", vg_name)
2820       if not test:
2821         try:
2822           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2823         except (ValueError, TypeError):
2824           _ErrorIf(True, constants.CV_ENODERPC, node,
2825                    "node returned invalid LVM info, check LVM status")
2826
2827   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2828     """Gets per-disk status information for all instances.
2829
2830     @type nodelist: list of strings
2831     @param nodelist: Node names
2832     @type node_image: dict of (name, L{objects.Node})
2833     @param node_image: Node objects
2834     @type instanceinfo: dict of (name, L{objects.Instance})
2835     @param instanceinfo: Instance objects
2836     @rtype: {instance: {node: [(succes, payload)]}}
2837     @return: a dictionary of per-instance dictionaries with nodes as
2838         keys and disk information as values; the disk information is a
2839         list of tuples (success, payload)
2840
2841     """
2842     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2843
2844     node_disks = {}
2845     node_disks_devonly = {}
2846     diskless_instances = set()
2847     diskless = constants.DT_DISKLESS
2848
2849     for nname in nodelist:
2850       node_instances = list(itertools.chain(node_image[nname].pinst,
2851                                             node_image[nname].sinst))
2852       diskless_instances.update(inst for inst in node_instances
2853                                 if instanceinfo[inst].disk_template == diskless)
2854       disks = [(inst, disk)
2855                for inst in node_instances
2856                for disk in instanceinfo[inst].disks]
2857
2858       if not disks:
2859         # No need to collect data
2860         continue
2861
2862       node_disks[nname] = disks
2863
2864       # Creating copies as SetDiskID below will modify the objects and that can
2865       # lead to incorrect data returned from nodes
2866       devonly = [dev.Copy() for (_, dev) in disks]
2867
2868       for dev in devonly:
2869         self.cfg.SetDiskID(dev, nname)
2870
2871       node_disks_devonly[nname] = devonly
2872
2873     assert len(node_disks) == len(node_disks_devonly)
2874
2875     # Collect data from all nodes with disks
2876     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2877                                                           node_disks_devonly)
2878
2879     assert len(result) == len(node_disks)
2880
2881     instdisk = {}
2882
2883     for (nname, nres) in result.items():
2884       disks = node_disks[nname]
2885
2886       if nres.offline:
2887         # No data from this node
2888         data = len(disks) * [(False, "node offline")]
2889       else:
2890         msg = nres.fail_msg
2891         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2892                  "while getting disk information: %s", msg)
2893         if msg:
2894           # No data from this node
2895           data = len(disks) * [(False, msg)]
2896         else:
2897           data = []
2898           for idx, i in enumerate(nres.payload):
2899             if isinstance(i, (tuple, list)) and len(i) == 2:
2900               data.append(i)
2901             else:
2902               logging.warning("Invalid result from node %s, entry %d: %s",
2903                               nname, idx, i)
2904               data.append((False, "Invalid result from the remote node"))
2905
2906       for ((inst, _), status) in zip(disks, data):
2907         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2908
2909     # Add empty entries for diskless instances.
2910     for inst in diskless_instances:
2911       assert inst not in instdisk
2912       instdisk[inst] = {}
2913
2914     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2915                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2916                       compat.all(isinstance(s, (tuple, list)) and
2917                                  len(s) == 2 for s in statuses)
2918                       for inst, nnames in instdisk.items()
2919                       for nname, statuses in nnames.items())
2920     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2921
2922     return instdisk
2923
2924   @staticmethod
2925   def _SshNodeSelector(group_uuid, all_nodes):
2926     """Create endless iterators for all potential SSH check hosts.
2927
2928     """
2929     nodes = [node for node in all_nodes
2930              if (node.group != group_uuid and
2931                  not node.offline)]
2932     keyfunc = operator.attrgetter("group")
2933
2934     return map(itertools.cycle,
2935                [sorted(map(operator.attrgetter("name"), names))
2936                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2937                                                   keyfunc)])
2938
2939   @classmethod
2940   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2941     """Choose which nodes should talk to which other nodes.
2942
2943     We will make nodes contact all nodes in their group, and one node from
2944     every other group.
2945
2946     @warning: This algorithm has a known issue if one node group is much
2947       smaller than others (e.g. just one node). In such a case all other
2948       nodes will talk to the single node.
2949
2950     """
2951     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2952     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2953
2954     return (online_nodes,
2955             dict((name, sorted([i.next() for i in sel]))
2956                  for name in online_nodes))
2957
2958   def BuildHooksEnv(self):
2959     """Build hooks env.
2960
2961     Cluster-Verify hooks just ran in the post phase and their failure makes
2962     the output be logged in the verify output and the verification to fail.
2963
2964     """
2965     env = {
2966       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2967       }
2968
2969     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2970                for node in self.my_node_info.values())
2971
2972     return env
2973
2974   def BuildHooksNodes(self):
2975     """Build hooks nodes.
2976
2977     """
2978     return ([], self.my_node_names)
2979
2980   def Exec(self, feedback_fn):
2981     """Verify integrity of the node group, performing various test on nodes.
2982
2983     """
2984     # This method has too many local variables. pylint: disable=R0914
2985     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2986
2987     if not self.my_node_names:
2988       # empty node group
2989       feedback_fn("* Empty node group, skipping verification")
2990       return True
2991
2992     self.bad = False
2993     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2994     verbose = self.op.verbose
2995     self._feedback_fn = feedback_fn
2996
2997     vg_name = self.cfg.GetVGName()
2998     drbd_helper = self.cfg.GetDRBDHelper()
2999     cluster = self.cfg.GetClusterInfo()
3000     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3001     hypervisors = cluster.enabled_hypervisors
3002     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3003
3004     i_non_redundant = [] # Non redundant instances
3005     i_non_a_balanced = [] # Non auto-balanced instances
3006     i_offline = 0 # Count of offline instances
3007     n_offline = 0 # Count of offline nodes
3008     n_drained = 0 # Count of nodes being drained
3009     node_vol_should = {}
3010
3011     # FIXME: verify OS list
3012
3013     # File verification
3014     filemap = _ComputeAncillaryFiles(cluster, False)
3015
3016     # do local checksums
3017     master_node = self.master_node = self.cfg.GetMasterNode()
3018     master_ip = self.cfg.GetMasterIP()
3019
3020     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3021
3022     user_scripts = []
3023     if self.cfg.GetUseExternalMipScript():
3024       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3025
3026     node_verify_param = {
3027       constants.NV_FILELIST:
3028         utils.UniqueSequence(filename
3029                              for files in filemap
3030                              for filename in files),
3031       constants.NV_NODELIST:
3032         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3033                                   self.all_node_info.values()),
3034       constants.NV_HYPERVISOR: hypervisors,
3035       constants.NV_HVPARAMS:
3036         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3037       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3038                                  for node in node_data_list
3039                                  if not node.offline],
3040       constants.NV_INSTANCELIST: hypervisors,
3041       constants.NV_VERSION: None,
3042       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3043       constants.NV_NODESETUP: None,
3044       constants.NV_TIME: None,
3045       constants.NV_MASTERIP: (master_node, master_ip),
3046       constants.NV_OSLIST: None,
3047       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3048       constants.NV_USERSCRIPTS: user_scripts,
3049       }
3050
3051     if vg_name is not None:
3052       node_verify_param[constants.NV_VGLIST] = None
3053       node_verify_param[constants.NV_LVLIST] = vg_name
3054       node_verify_param[constants.NV_PVLIST] = [vg_name]
3055       node_verify_param[constants.NV_DRBDLIST] = None
3056
3057     if drbd_helper:
3058       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3059
3060     # bridge checks
3061     # FIXME: this needs to be changed per node-group, not cluster-wide
3062     bridges = set()
3063     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3064     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065       bridges.add(default_nicpp[constants.NIC_LINK])
3066     for instance in self.my_inst_info.values():
3067       for nic in instance.nics:
3068         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3069         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070           bridges.add(full_nic[constants.NIC_LINK])
3071
3072     if bridges:
3073       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3074
3075     # Build our expected cluster state
3076     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3077                                                  name=node.name,
3078                                                  vm_capable=node.vm_capable))
3079                       for node in node_data_list)
3080
3081     # Gather OOB paths
3082     oob_paths = []
3083     for node in self.all_node_info.values():
3084       path = _SupportsOob(self.cfg, node)
3085       if path and path not in oob_paths:
3086         oob_paths.append(path)
3087
3088     if oob_paths:
3089       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3090
3091     for instance in self.my_inst_names:
3092       inst_config = self.my_inst_info[instance]
3093
3094       for nname in inst_config.all_nodes:
3095         if nname not in node_image:
3096           gnode = self.NodeImage(name=nname)
3097           gnode.ghost = (nname not in self.all_node_info)
3098           node_image[nname] = gnode
3099
3100       inst_config.MapLVsByNode(node_vol_should)
3101
3102       pnode = inst_config.primary_node
3103       node_image[pnode].pinst.append(instance)
3104
3105       for snode in inst_config.secondary_nodes:
3106         nimg = node_image[snode]
3107         nimg.sinst.append(instance)
3108         if pnode not in nimg.sbp:
3109           nimg.sbp[pnode] = []
3110         nimg.sbp[pnode].append(instance)
3111
3112     # At this point, we have the in-memory data structures complete,
3113     # except for the runtime information, which we'll gather next
3114
3115     # Due to the way our RPC system works, exact response times cannot be
3116     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3117     # time before and after executing the request, we can at least have a time
3118     # window.
3119     nvinfo_starttime = time.time()
3120     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3121                                            node_verify_param,
3122                                            self.cfg.GetClusterName())
3123     nvinfo_endtime = time.time()
3124
3125     if self.extra_lv_nodes and vg_name is not None:
3126       extra_lv_nvinfo = \
3127           self.rpc.call_node_verify(self.extra_lv_nodes,
3128                                     {constants.NV_LVLIST: vg_name},
3129                                     self.cfg.GetClusterName())
3130     else:
3131       extra_lv_nvinfo = {}
3132
3133     all_drbd_map = self.cfg.ComputeDRBDMap()
3134
3135     feedback_fn("* Gathering disk information (%s nodes)" %
3136                 len(self.my_node_names))
3137     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3138                                      self.my_inst_info)
3139
3140     feedback_fn("* Verifying configuration file consistency")
3141
3142     # If not all nodes are being checked, we need to make sure the master node
3143     # and a non-checked vm_capable node are in the list.
3144     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3145     if absent_nodes:
3146       vf_nvinfo = all_nvinfo.copy()
3147       vf_node_info = list(self.my_node_info.values())
3148       additional_nodes = []
3149       if master_node not in self.my_node_info:
3150         additional_nodes.append(master_node)
3151         vf_node_info.append(self.all_node_info[master_node])
3152       # Add the first vm_capable node we find which is not included
3153       for node in absent_nodes:
3154         nodeinfo = self.all_node_info[node]
3155         if nodeinfo.vm_capable and not nodeinfo.offline:
3156           additional_nodes.append(node)
3157           vf_node_info.append(self.all_node_info[node])
3158           break
3159       key = constants.NV_FILELIST
3160       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3161                                                  {key: node_verify_param[key]},
3162                                                  self.cfg.GetClusterName()))
3163     else:
3164       vf_nvinfo = all_nvinfo
3165       vf_node_info = self.my_node_info.values()
3166
3167     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3168
3169     feedback_fn("* Verifying node status")
3170
3171     refos_img = None
3172
3173     for node_i in node_data_list:
3174       node = node_i.name
3175       nimg = node_image[node]
3176
3177       if node_i.offline:
3178         if verbose:
3179           feedback_fn("* Skipping offline node %s" % (node,))
3180         n_offline += 1
3181         continue
3182
3183       if node == master_node:
3184         ntype = "master"
3185       elif node_i.master_candidate:
3186         ntype = "master candidate"
3187       elif node_i.drained:
3188         ntype = "drained"
3189         n_drained += 1
3190       else:
3191         ntype = "regular"
3192       if verbose:
3193         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3194
3195       msg = all_nvinfo[node].fail_msg
3196       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3197                msg)
3198       if msg:
3199         nimg.rpc_fail = True
3200         continue
3201
3202       nresult = all_nvinfo[node].payload
3203
3204       nimg.call_ok = self._VerifyNode(node_i, nresult)
3205       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3206       self._VerifyNodeNetwork(node_i, nresult)
3207       self._VerifyNodeUserScripts(node_i, nresult)
3208       self._VerifyOob(node_i, nresult)
3209
3210       if nimg.vm_capable:
3211         self._VerifyNodeLVM(node_i, nresult, vg_name)
3212         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3213                              all_drbd_map)
3214
3215         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3216         self._UpdateNodeInstances(node_i, nresult, nimg)
3217         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3218         self._UpdateNodeOS(node_i, nresult, nimg)
3219
3220         if not nimg.os_fail:
3221           if refos_img is None:
3222             refos_img = nimg
3223           self._VerifyNodeOS(node_i, nimg, refos_img)
3224         self._VerifyNodeBridges(node_i, nresult, bridges)
3225
3226         # Check whether all running instancies are primary for the node. (This
3227         # can no longer be done from _VerifyInstance below, since some of the
3228         # wrong instances could be from other node groups.)
3229         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3230
3231         for inst in non_primary_inst:
3232           # FIXME: investigate best way to handle offline insts
3233           if inst.admin_state == constants.ADMINST_OFFLINE:
3234             if verbose:
3235               feedback_fn("* Skipping offline instance %s" % inst.name)
3236             i_offline += 1
3237             continue
3238           test = inst in self.all_inst_info
3239           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3240                    "instance should not run on node %s", node_i.name)
3241           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3242                    "node is running unknown instance %s", inst)
3243
3244     for node, result in extra_lv_nvinfo.items():
3245       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3246                               node_image[node], vg_name)
3247
3248     feedback_fn("* Verifying instance status")
3249     for instance in self.my_inst_names:
3250       if verbose:
3251         feedback_fn("* Verifying instance %s" % instance)
3252       inst_config = self.my_inst_info[instance]
3253       self._VerifyInstance(instance, inst_config, node_image,
3254                            instdisk[instance])
3255       inst_nodes_offline = []
3256
3257       pnode = inst_config.primary_node
3258       pnode_img = node_image[pnode]
3259       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3260                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3261                " primary node failed", instance)
3262
3263       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3264                pnode_img.offline,
3265                constants.CV_EINSTANCEBADNODE, instance,
3266                "instance is marked as running and lives on offline node %s",
3267                inst_config.primary_node)
3268
3269       # If the instance is non-redundant we cannot survive losing its primary
3270       # node, so we are not N+1 compliant. On the other hand we have no disk
3271       # templates with more than one secondary so that situation is not well
3272       # supported either.
3273       # FIXME: does not support file-backed instances
3274       if not inst_config.secondary_nodes:
3275         i_non_redundant.append(instance)
3276
3277       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3278                constants.CV_EINSTANCELAYOUT,
3279                instance, "instance has multiple secondary nodes: %s",
3280                utils.CommaJoin(inst_config.secondary_nodes),
3281                code=self.ETYPE_WARNING)
3282
3283       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3284         pnode = inst_config.primary_node
3285         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3286         instance_groups = {}
3287
3288         for node in instance_nodes:
3289           instance_groups.setdefault(self.all_node_info[node].group,
3290                                      []).append(node)
3291
3292         pretty_list = [
3293           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3294           # Sort so that we always list the primary node first.
3295           for group, nodes in sorted(instance_groups.items(),
3296                                      key=lambda (_, nodes): pnode in nodes,
3297                                      reverse=True)]
3298
3299         self._ErrorIf(len(instance_groups) > 1,
3300                       constants.CV_EINSTANCESPLITGROUPS,
3301                       instance, "instance has primary and secondary nodes in"
3302                       " different groups: %s", utils.CommaJoin(pretty_list),
3303                       code=self.ETYPE_WARNING)
3304
3305       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3306         i_non_a_balanced.append(instance)
3307
3308       for snode in inst_config.secondary_nodes:
3309         s_img = node_image[snode]
3310         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3311                  snode, "instance %s, connection to secondary node failed",
3312                  instance)
3313
3314         if s_img.offline:
3315           inst_nodes_offline.append(snode)
3316
3317       # warn that the instance lives on offline nodes
3318       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3319                "instance has offline secondary node(s) %s",
3320                utils.CommaJoin(inst_nodes_offline))
3321       # ... or ghost/non-vm_capable nodes
3322       for node in inst_config.all_nodes:
3323         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3324                  instance, "instance lives on ghost node %s", node)
3325         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3326                  instance, "instance lives on non-vm_capable node %s", node)
3327
3328     feedback_fn("* Verifying orphan volumes")
3329     reserved = utils.FieldSet(*cluster.reserved_lvs)
3330
3331     # We will get spurious "unknown volume" warnings if any node of this group
3332     # is secondary for an instance whose primary is in another group. To avoid
3333     # them, we find these instances and add their volumes to node_vol_should.
3334     for inst in self.all_inst_info.values():
3335       for secondary in inst.secondary_nodes:
3336         if (secondary in self.my_node_info
3337             and inst.name not in self.my_inst_info):
3338           inst.MapLVsByNode(node_vol_should)
3339           break
3340
3341     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3342
3343     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3344       feedback_fn("* Verifying N+1 Memory redundancy")
3345       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3346
3347     feedback_fn("* Other Notes")
3348     if i_non_redundant:
3349       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3350                   % len(i_non_redundant))
3351
3352     if i_non_a_balanced:
3353       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3354                   % len(i_non_a_balanced))
3355
3356     if i_offline:
3357       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3358
3359     if n_offline:
3360       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3361
3362     if n_drained:
3363       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3364
3365     return not self.bad
3366
3367   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3368     """Analyze the post-hooks' result
3369
3370     This method analyses the hook result, handles it, and sends some
3371     nicely-formatted feedback back to the user.
3372
3373     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3374         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3375     @param hooks_results: the results of the multi-node hooks rpc call
3376     @param feedback_fn: function used send feedback back to the caller
3377     @param lu_result: previous Exec result
3378     @return: the new Exec result, based on the previous result
3379         and hook results
3380
3381     """
3382     # We only really run POST phase hooks, only for non-empty groups,
3383     # and are only interested in their results
3384     if not self.my_node_names:
3385       # empty node group
3386       pass
3387     elif phase == constants.HOOKS_PHASE_POST:
3388       # Used to change hooks' output to proper indentation
3389       feedback_fn("* Hooks Results")
3390       assert hooks_results, "invalid result from hooks"
3391
3392       for node_name in hooks_results:
3393         res = hooks_results[node_name]
3394         msg = res.fail_msg
3395         test = msg and not res.offline
3396         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3397                       "Communication failure in hooks execution: %s", msg)
3398         if res.offline or msg:
3399           # No need to investigate payload if node is offline or gave
3400           # an error.
3401           continue
3402         for script, hkr, output in res.payload:
3403           test = hkr == constants.HKR_FAIL
3404           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3405                         "Script %s failed, output:", script)
3406           if test:
3407             output = self._HOOKS_INDENT_RE.sub("      ", output)
3408             feedback_fn("%s" % output)
3409             lu_result = False
3410
3411     return lu_result
3412
3413
3414 class LUClusterVerifyDisks(NoHooksLU):
3415   """Verifies the cluster disks status.
3416
3417   """
3418   REQ_BGL = False
3419
3420   def ExpandNames(self):
3421     self.share_locks = _ShareAll()
3422     self.needed_locks = {
3423       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3424       }
3425
3426   def Exec(self, feedback_fn):
3427     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3428
3429     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3430     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3431                            for group in group_names])
3432
3433
3434 class LUGroupVerifyDisks(NoHooksLU):
3435   """Verifies the status of all disks in a node group.
3436
3437   """
3438   REQ_BGL = False
3439
3440   def ExpandNames(self):
3441     # Raises errors.OpPrereqError on its own if group can't be found
3442     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3443
3444     self.share_locks = _ShareAll()
3445     self.needed_locks = {
3446       locking.LEVEL_INSTANCE: [],
3447       locking.LEVEL_NODEGROUP: [],
3448       locking.LEVEL_NODE: [],
3449       }
3450
3451   def DeclareLocks(self, level):
3452     if level == locking.LEVEL_INSTANCE:
3453       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3454
3455       # Lock instances optimistically, needs verification once node and group
3456       # locks have been acquired
3457       self.needed_locks[locking.LEVEL_INSTANCE] = \
3458         self.cfg.GetNodeGroupInstances(self.group_uuid)
3459
3460     elif level == locking.LEVEL_NODEGROUP:
3461       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3462
3463       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3464         set([self.group_uuid] +
3465             # Lock all groups used by instances optimistically; this requires
3466             # going via the node before it's locked, requiring verification
3467             # later on
3468             [group_uuid
3469              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3470              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3471
3472     elif level == locking.LEVEL_NODE:
3473       # This will only lock the nodes in the group to be verified which contain
3474       # actual instances
3475       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3476       self._LockInstancesNodes()
3477
3478       # Lock all nodes in group to be verified
3479       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3480       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3481       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3482
3483   def CheckPrereq(self):
3484     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3485     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3486     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3487
3488     assert self.group_uuid in owned_groups
3489
3490     # Check if locked instances are still correct
3491     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3492
3493     # Get instance information
3494     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3495
3496     # Check if node groups for locked instances are still correct
3497     for (instance_name, inst) in self.instances.items():
3498       assert owned_nodes.issuperset(inst.all_nodes), \
3499         "Instance %s's nodes changed while we kept the lock" % instance_name
3500
3501       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3502                                              owned_groups)
3503
3504       assert self.group_uuid in inst_groups, \
3505         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3506
3507   def Exec(self, feedback_fn):
3508     """Verify integrity of cluster disks.
3509
3510     @rtype: tuple of three items
3511     @return: a tuple of (dict of node-to-node_error, list of instances
3512         which need activate-disks, dict of instance: (node, volume) for
3513         missing volumes
3514
3515     """
3516     res_nodes = {}
3517     res_instances = set()
3518     res_missing = {}
3519
3520     nv_dict = _MapInstanceDisksToNodes([inst
3521             for inst in self.instances.values()
3522             if inst.admin_state == constants.ADMINST_UP])
3523
3524     if nv_dict:
3525       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3526                              set(self.cfg.GetVmCapableNodeList()))
3527
3528       node_lvs = self.rpc.call_lv_list(nodes, [])
3529
3530       for (node, node_res) in node_lvs.items():
3531         if node_res.offline:
3532           continue
3533
3534         msg = node_res.fail_msg
3535         if msg:
3536           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3537           res_nodes[node] = msg
3538           continue
3539
3540         for lv_name, (_, _, lv_online) in node_res.payload.items():
3541           inst = nv_dict.pop((node, lv_name), None)
3542           if not (lv_online or inst is None):
3543             res_instances.add(inst)
3544
3545       # any leftover items in nv_dict are missing LVs, let's arrange the data
3546       # better
3547       for key, inst in nv_dict.iteritems():
3548         res_missing.setdefault(inst, []).append(list(key))
3549
3550     return (res_nodes, list(res_instances), res_missing)
3551
3552
3553 class LUClusterRepairDiskSizes(NoHooksLU):
3554   """Verifies the cluster disks sizes.
3555
3556   """
3557   REQ_BGL = False
3558
3559   def ExpandNames(self):
3560     if self.op.instances:
3561       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3562       self.needed_locks = {
3563         locking.LEVEL_NODE_RES: [],
3564         locking.LEVEL_INSTANCE: self.wanted_names,
3565         }
3566       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3567     else:
3568       self.wanted_names = None
3569       self.needed_locks = {
3570         locking.LEVEL_NODE_RES: locking.ALL_SET,
3571         locking.LEVEL_INSTANCE: locking.ALL_SET,
3572         }
3573     self.share_locks = {
3574       locking.LEVEL_NODE_RES: 1,
3575       locking.LEVEL_INSTANCE: 0,
3576       }
3577
3578   def DeclareLocks(self, level):
3579     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3580       self._LockInstancesNodes(primary_only=True, level=level)
3581
3582   def CheckPrereq(self):
3583     """Check prerequisites.
3584
3585     This only checks the optional instance list against the existing names.
3586
3587     """
3588     if self.wanted_names is None:
3589       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3590
3591     self.wanted_instances = \
3592         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3593
3594   def _EnsureChildSizes(self, disk):
3595     """Ensure children of the disk have the needed disk size.
3596
3597     This is valid mainly for DRBD8 and fixes an issue where the
3598     children have smaller disk size.
3599
3600     @param disk: an L{ganeti.objects.Disk} object
3601
3602     """
3603     if disk.dev_type == constants.LD_DRBD8:
3604       assert disk.children, "Empty children for DRBD8?"
3605       fchild = disk.children[0]
3606       mismatch = fchild.size < disk.size
3607       if mismatch:
3608         self.LogInfo("Child disk has size %d, parent %d, fixing",
3609                      fchild.size, disk.size)
3610         fchild.size = disk.size
3611
3612       # and we recurse on this child only, not on the metadev
3613       return self._EnsureChildSizes(fchild) or mismatch
3614     else:
3615       return False
3616
3617   def Exec(self, feedback_fn):
3618     """Verify the size of cluster disks.
3619
3620     """
3621     # TODO: check child disks too
3622     # TODO: check differences in size between primary/secondary nodes
3623     per_node_disks = {}
3624     for instance in self.wanted_instances:
3625       pnode = instance.primary_node
3626       if pnode not in per_node_disks:
3627         per_node_disks[pnode] = []
3628       for idx, disk in enumerate(instance.disks):
3629         per_node_disks[pnode].append((instance, idx, disk))
3630
3631     assert not (frozenset(per_node_disks.keys()) -
3632                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3633       "Not owning correct locks"
3634     assert not self.owned_locks(locking.LEVEL_NODE)
3635
3636     changed = []
3637     for node, dskl in per_node_disks.items():
3638       newl = [v[2].Copy() for v in dskl]
3639       for dsk in newl:
3640         self.cfg.SetDiskID(dsk, node)
3641       result = self.rpc.call_blockdev_getsize(node, newl)
3642       if result.fail_msg:
3643         self.LogWarning("Failure in blockdev_getsize call to node"
3644                         " %s, ignoring", node)
3645         continue
3646       if len(result.payload) != len(dskl):
3647         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3648                         " result.payload=%s", node, len(dskl), result.payload)
3649         self.LogWarning("Invalid result from node %s, ignoring node results",
3650                         node)
3651         continue
3652       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3653         if size is None:
3654           self.LogWarning("Disk %d of instance %s did not return size"
3655                           " information, ignoring", idx, instance.name)
3656           continue
3657         if not isinstance(size, (int, long)):
3658           self.LogWarning("Disk %d of instance %s did not return valid"
3659                           " size information, ignoring", idx, instance.name)
3660           continue
3661         size = size >> 20
3662         if size != disk.size:
3663           self.LogInfo("Disk %d of instance %s has mismatched size,"
3664                        " correcting: recorded %d, actual %d", idx,
3665                        instance.name, disk.size, size)
3666           disk.size = size
3667           self.cfg.Update(instance, feedback_fn)
3668           changed.append((instance.name, idx, size))
3669         if self._EnsureChildSizes(disk):
3670           self.cfg.Update(instance, feedback_fn)
3671           changed.append((instance.name, idx, disk.size))
3672     return changed
3673
3674
3675 class LUClusterRename(LogicalUnit):
3676   """Rename the cluster.
3677
3678   """
3679   HPATH = "cluster-rename"
3680   HTYPE = constants.HTYPE_CLUSTER
3681
3682   def BuildHooksEnv(self):
3683     """Build hooks env.
3684
3685     """
3686     return {
3687       "OP_TARGET": self.cfg.GetClusterName(),
3688       "NEW_NAME": self.op.name,
3689       }
3690
3691   def BuildHooksNodes(self):
3692     """Build hooks nodes.
3693
3694     """
3695     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3696
3697   def CheckPrereq(self):
3698     """Verify that the passed name is a valid one.
3699
3700     """
3701     hostname = netutils.GetHostname(name=self.op.name,
3702                                     family=self.cfg.GetPrimaryIPFamily())
3703
3704     new_name = hostname.name
3705     self.ip = new_ip = hostname.ip
3706     old_name = self.cfg.GetClusterName()
3707     old_ip = self.cfg.GetMasterIP()
3708     if new_name == old_name and new_ip == old_ip:
3709       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3710                                  " cluster has changed",
3711                                  errors.ECODE_INVAL)
3712     if new_ip != old_ip:
3713       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3714         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3715                                    " reachable on the network" %
3716                                    new_ip, errors.ECODE_NOTUNIQUE)
3717
3718     self.op.name = new_name
3719
3720   def Exec(self, feedback_fn):
3721     """Rename the cluster.
3722
3723     """
3724     clustername = self.op.name
3725     new_ip = self.ip
3726
3727     # shutdown the master IP
3728     master_params = self.cfg.GetMasterNetworkParameters()
3729     ems = self.cfg.GetUseExternalMipScript()
3730     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3731                                                      master_params, ems)
3732     result.Raise("Could not disable the master role")
3733
3734     try:
3735       cluster = self.cfg.GetClusterInfo()
3736       cluster.cluster_name = clustername
3737       cluster.master_ip = new_ip
3738       self.cfg.Update(cluster, feedback_fn)
3739
3740       # update the known hosts file
3741       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3742       node_list = self.cfg.GetOnlineNodeList()
3743       try:
3744         node_list.remove(master_params.name)
3745       except ValueError:
3746         pass
3747       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3748     finally:
3749       master_params.ip = new_ip
3750       result = self.rpc.call_node_activate_master_ip(master_params.name,
3751                                                      master_params, ems)
3752       msg = result.fail_msg
3753       if msg:
3754         self.LogWarning("Could not re-enable the master role on"
3755                         " the master, please restart manually: %s", msg)
3756
3757     return clustername
3758
3759
3760 def _ValidateNetmask(cfg, netmask):
3761   """Checks if a netmask is valid.
3762
3763   @type cfg: L{config.ConfigWriter}
3764   @param cfg: The cluster configuration
3765   @type netmask: int
3766   @param netmask: the netmask to be verified
3767   @raise errors.OpPrereqError: if the validation fails
3768
3769   """
3770   ip_family = cfg.GetPrimaryIPFamily()
3771   try:
3772     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3773   except errors.ProgrammerError:
3774     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3775                                ip_family)
3776   if not ipcls.ValidateNetmask(netmask):
3777     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3778                                 (netmask))
3779
3780
3781 class LUClusterSetParams(LogicalUnit):
3782   """Change the parameters of the cluster.
3783
3784   """
3785   HPATH = "cluster-modify"
3786   HTYPE = constants.HTYPE_CLUSTER
3787   REQ_BGL = False
3788
3789   def CheckArguments(self):
3790     """Check parameters
3791
3792     """
3793     if self.op.uid_pool:
3794       uidpool.CheckUidPool(self.op.uid_pool)
3795
3796     if self.op.add_uids:
3797       uidpool.CheckUidPool(self.op.add_uids)
3798
3799     if self.op.remove_uids:
3800       uidpool.CheckUidPool(self.op.remove_uids)
3801
3802     if self.op.master_netmask is not None:
3803       _ValidateNetmask(self.cfg, self.op.master_netmask)
3804
3805     if self.op.diskparams:
3806       for dt_params in self.op.diskparams.values():
3807         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3808
3809   def ExpandNames(self):
3810     # FIXME: in the future maybe other cluster params won't require checking on
3811     # all nodes to be modified.
3812     self.needed_locks = {
3813       locking.LEVEL_NODE: locking.ALL_SET,
3814       locking.LEVEL_INSTANCE: locking.ALL_SET,
3815       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3816     }
3817     self.share_locks = {
3818         locking.LEVEL_NODE: 1,
3819         locking.LEVEL_INSTANCE: 1,
3820         locking.LEVEL_NODEGROUP: 1,
3821     }
3822
3823   def BuildHooksEnv(self):
3824     """Build hooks env.
3825
3826     """
3827     return {
3828       "OP_TARGET": self.cfg.GetClusterName(),
3829       "NEW_VG_NAME": self.op.vg_name,
3830       }
3831
3832   def BuildHooksNodes(self):
3833     """Build hooks nodes.
3834
3835     """
3836     mn = self.cfg.GetMasterNode()
3837     return ([mn], [mn])
3838
3839   def CheckPrereq(self):
3840     """Check prerequisites.
3841
3842     This checks whether the given params don't conflict and
3843     if the given volume group is valid.
3844
3845     """
3846     if self.op.vg_name is not None and not self.op.vg_name:
3847       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3848         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3849                                    " instances exist", errors.ECODE_INVAL)
3850
3851     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3852       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3853         raise errors.OpPrereqError("Cannot disable drbd helper while"
3854                                    " drbd-based instances exist",
3855                                    errors.ECODE_INVAL)
3856
3857     node_list = self.owned_locks(locking.LEVEL_NODE)
3858
3859     # if vg_name not None, checks given volume group on all nodes
3860     if self.op.vg_name:
3861       vglist = self.rpc.call_vg_list(node_list)
3862       for node in node_list:
3863         msg = vglist[node].fail_msg
3864         if msg:
3865           # ignoring down node
3866           self.LogWarning("Error while gathering data on node %s"
3867                           " (ignoring node): %s", node, msg)
3868           continue
3869         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3870                                               self.op.vg_name,
3871                                               constants.MIN_VG_SIZE)
3872         if vgstatus:
3873           raise errors.OpPrereqError("Error on node '%s': %s" %
3874                                      (node, vgstatus), errors.ECODE_ENVIRON)
3875
3876     if self.op.drbd_helper:
3877       # checks given drbd helper on all nodes
3878       helpers = self.rpc.call_drbd_helper(node_list)
3879       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3880         if ninfo.offline:
3881           self.LogInfo("Not checking drbd helper on offline node %s", node)
3882           continue
3883         msg = helpers[node].fail_msg
3884         if msg:
3885           raise errors.OpPrereqError("Error checking drbd helper on node"
3886                                      " '%s': %s" % (node, msg),
3887                                      errors.ECODE_ENVIRON)
3888         node_helper = helpers[node].payload
3889         if node_helper != self.op.drbd_helper:
3890           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3891                                      (node, node_helper), errors.ECODE_ENVIRON)
3892
3893     self.cluster = cluster = self.cfg.GetClusterInfo()
3894     # validate params changes
3895     if self.op.beparams:
3896       objects.UpgradeBeParams(self.op.beparams)
3897       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3898       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3899
3900     if self.op.ndparams:
3901       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3902       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3903
3904       # TODO: we need a more general way to handle resetting
3905       # cluster-level parameters to default values
3906       if self.new_ndparams["oob_program"] == "":
3907         self.new_ndparams["oob_program"] = \
3908             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3909
3910     if self.op.hv_state:
3911       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3912                                             self.cluster.hv_state_static)
3913       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3914                                for hv, values in new_hv_state.items())
3915
3916     if self.op.disk_state:
3917       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3918                                                 self.cluster.disk_state_static)
3919       self.new_disk_state = \
3920         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3921                             for name, values in svalues.items()))
3922              for storage, svalues in new_disk_state.items())
3923
3924     if self.op.ipolicy:
3925       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3926                                             group_policy=False)
3927
3928       all_instances = self.cfg.GetAllInstancesInfo().values()
3929       violations = set()
3930       for group in self.cfg.GetAllNodeGroupsInfo().values():
3931         instances = frozenset([inst for inst in all_instances
3932                                if compat.any(node in group.members
3933                                              for node in inst.all_nodes)])
3934         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3935         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3936                                                                    group),
3937                                             new_ipolicy, instances)
3938         if new:
3939           violations.update(new)
3940
3941       if violations:
3942         self.LogWarning("After the ipolicy change the following instances"
3943                         " violate them: %s",
3944                         utils.CommaJoin(violations))
3945
3946     if self.op.nicparams:
3947       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3948       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3949       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3950       nic_errors = []
3951
3952       # check all instances for consistency
3953       for instance in self.cfg.GetAllInstancesInfo().values():
3954         for nic_idx, nic in enumerate(instance.nics):
3955           params_copy = copy.deepcopy(nic.nicparams)
3956           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3957
3958           # check parameter syntax
3959           try:
3960             objects.NIC.CheckParameterSyntax(params_filled)
3961           except errors.ConfigurationError, err:
3962             nic_errors.append("Instance %s, nic/%d: %s" %
3963                               (instance.name, nic_idx, err))
3964
3965           # if we're moving instances to routed, check that they have an ip
3966           target_mode = params_filled[constants.NIC_MODE]
3967           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3968             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3969                               " address" % (instance.name, nic_idx))
3970       if nic_errors:
3971         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3972                                    "\n".join(nic_errors))
3973
3974     # hypervisor list/parameters
3975     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3976     if self.op.hvparams:
3977       for hv_name, hv_dict in self.op.hvparams.items():
3978         if hv_name not in self.new_hvparams:
3979           self.new_hvparams[hv_name] = hv_dict
3980         else:
3981           self.new_hvparams[hv_name].update(hv_dict)
3982
3983     # disk template parameters
3984     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3985     if self.op.diskparams:
3986       for dt_name, dt_params in self.op.diskparams.items():
3987         if dt_name not in self.op.diskparams:
3988           self.new_diskparams[dt_name] = dt_params
3989         else:
3990           self.new_diskparams[dt_name].update(dt_params)
3991
3992     # os hypervisor parameters
3993     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3994     if self.op.os_hvp:
3995       for os_name, hvs in self.op.os_hvp.items():
3996         if os_name not in self.new_os_hvp:
3997           self.new_os_hvp[os_name] = hvs
3998         else:
3999           for hv_name, hv_dict in hvs.items():
4000             if hv_name not in self.new_os_hvp[os_name]:
4001               self.new_os_hvp[os_name][hv_name] = hv_dict
4002             else:
4003               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4004
4005     # os parameters
4006     self.new_osp = objects.FillDict(cluster.osparams, {})
4007     if self.op.osparams:
4008       for os_name, osp in self.op.osparams.items():
4009         if os_name not in self.new_osp:
4010           self.new_osp[os_name] = {}
4011
4012         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4013                                                   use_none=True)
4014
4015         if not self.new_osp[os_name]:
4016           # we removed all parameters
4017           del self.new_osp[os_name]
4018         else:
4019           # check the parameter validity (remote check)
4020           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4021                          os_name, self.new_osp[os_name])
4022
4023     # changes to the hypervisor list
4024     if self.op.enabled_hypervisors is not None:
4025       self.hv_list = self.op.enabled_hypervisors
4026       for hv in self.hv_list:
4027         # if the hypervisor doesn't already exist in the cluster
4028         # hvparams, we initialize it to empty, and then (in both
4029         # cases) we make sure to fill the defaults, as we might not
4030         # have a complete defaults list if the hypervisor wasn't
4031         # enabled before
4032         if hv not in new_hvp:
4033           new_hvp[hv] = {}
4034         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4035         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4036     else:
4037       self.hv_list = cluster.enabled_hypervisors
4038
4039     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4040       # either the enabled list has changed, or the parameters have, validate
4041       for hv_name, hv_params in self.new_hvparams.items():
4042         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4043             (self.op.enabled_hypervisors and
4044              hv_name in self.op.enabled_hypervisors)):
4045           # either this is a new hypervisor, or its parameters have changed
4046           hv_class = hypervisor.GetHypervisor(hv_name)
4047           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4048           hv_class.CheckParameterSyntax(hv_params)
4049           _CheckHVParams(self, node_list, hv_name, hv_params)
4050
4051     if self.op.os_hvp:
4052       # no need to check any newly-enabled hypervisors, since the
4053       # defaults have already been checked in the above code-block
4054       for os_name, os_hvp in self.new_os_hvp.items():
4055         for hv_name, hv_params in os_hvp.items():
4056           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4057           # we need to fill in the new os_hvp on top of the actual hv_p
4058           cluster_defaults = self.new_hvparams.get(hv_name, {})
4059           new_osp = objects.FillDict(cluster_defaults, hv_params)
4060           hv_class = hypervisor.GetHypervisor(hv_name)
4061           hv_class.CheckParameterSyntax(new_osp)
4062           _CheckHVParams(self, node_list, hv_name, new_osp)
4063
4064     if self.op.default_iallocator:
4065       alloc_script = utils.FindFile(self.op.default_iallocator,
4066                                     constants.IALLOCATOR_SEARCH_PATH,
4067                                     os.path.isfile)
4068       if alloc_script is None:
4069         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4070                                    " specified" % self.op.default_iallocator,
4071                                    errors.ECODE_INVAL)
4072
4073   def Exec(self, feedback_fn):
4074     """Change the parameters of the cluster.
4075
4076     """
4077     if self.op.vg_name is not None:
4078       new_volume = self.op.vg_name
4079       if not new_volume:
4080         new_volume = None
4081       if new_volume != self.cfg.GetVGName():
4082         self.cfg.SetVGName(new_volume)
4083       else:
4084         feedback_fn("Cluster LVM configuration already in desired"
4085                     " state, not changing")
4086     if self.op.drbd_helper is not None:
4087       new_helper = self.op.drbd_helper
4088       if not new_helper:
4089         new_helper = None
4090       if new_helper != self.cfg.GetDRBDHelper():
4091         self.cfg.SetDRBDHelper(new_helper)
4092       else:
4093         feedback_fn("Cluster DRBD helper already in desired state,"
4094                     " not changing")
4095     if self.op.hvparams:
4096       self.cluster.hvparams = self.new_hvparams
4097     if self.op.os_hvp:
4098       self.cluster.os_hvp = self.new_os_hvp
4099     if self.op.enabled_hypervisors is not None:
4100       self.cluster.hvparams = self.new_hvparams
4101       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4102     if self.op.beparams:
4103       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4104     if self.op.nicparams:
4105       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4106     if self.op.ipolicy:
4107       self.cluster.ipolicy = self.new_ipolicy
4108     if self.op.osparams:
4109       self.cluster.osparams = self.new_osp
4110     if self.op.ndparams:
4111       self.cluster.ndparams = self.new_ndparams
4112     if self.op.diskparams:
4113       self.cluster.diskparams = self.new_diskparams
4114     if self.op.hv_state:
4115       self.cluster.hv_state_static = self.new_hv_state
4116     if self.op.disk_state:
4117       self.cluster.disk_state_static = self.new_disk_state
4118
4119     if self.op.candidate_pool_size is not None:
4120       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4121       # we need to update the pool size here, otherwise the save will fail
4122       _AdjustCandidatePool(self, [])
4123
4124     if self.op.maintain_node_health is not None:
4125       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4126         feedback_fn("Note: CONFD was disabled at build time, node health"
4127                     " maintenance is not useful (still enabling it)")
4128       self.cluster.maintain_node_health = self.op.maintain_node_health
4129
4130     if self.op.prealloc_wipe_disks is not None:
4131       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4132
4133     if self.op.add_uids is not None:
4134       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4135
4136     if self.op.remove_uids is not None:
4137       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4138
4139     if self.op.uid_pool is not None:
4140       self.cluster.uid_pool = self.op.uid_pool
4141
4142     if self.op.default_iallocator is not None:
4143       self.cluster.default_iallocator = self.op.default_iallocator
4144
4145     if self.op.reserved_lvs is not None:
4146       self.cluster.reserved_lvs = self.op.reserved_lvs
4147
4148     if self.op.use_external_mip_script is not None:
4149       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4150
4151     def helper_os(aname, mods, desc):
4152       desc += " OS list"
4153       lst = getattr(self.cluster, aname)
4154       for key, val in mods:
4155         if key == constants.DDM_ADD:
4156           if val in lst:
4157             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4158           else:
4159             lst.append(val)
4160         elif key == constants.DDM_REMOVE:
4161           if val in lst:
4162             lst.remove(val)
4163           else:
4164             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4165         else:
4166           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4167
4168     if self.op.hidden_os:
4169       helper_os("hidden_os", self.op.hidden_os, "hidden")
4170
4171     if self.op.blacklisted_os:
4172       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4173
4174     if self.op.master_netdev:
4175       master_params = self.cfg.GetMasterNetworkParameters()
4176       ems = self.cfg.GetUseExternalMipScript()
4177       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4178                   self.cluster.master_netdev)
4179       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4180                                                        master_params, ems)
4181       result.Raise("Could not disable the master ip")
4182       feedback_fn("Changing master_netdev from %s to %s" %
4183                   (master_params.netdev, self.op.master_netdev))
4184       self.cluster.master_netdev = self.op.master_netdev
4185
4186     if self.op.master_netmask:
4187       master_params = self.cfg.GetMasterNetworkParameters()
4188       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4189       result = self.rpc.call_node_change_master_netmask(master_params.name,
4190                                                         master_params.netmask,
4191                                                         self.op.master_netmask,
4192                                                         master_params.ip,
4193                                                         master_params.netdev)
4194       if result.fail_msg:
4195         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4196         feedback_fn(msg)
4197
4198       self.cluster.master_netmask = self.op.master_netmask
4199
4200     self.cfg.Update(self.cluster, feedback_fn)
4201
4202     if self.op.master_netdev:
4203       master_params = self.cfg.GetMasterNetworkParameters()
4204       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4205                   self.op.master_netdev)
4206       ems = self.cfg.GetUseExternalMipScript()
4207       result = self.rpc.call_node_activate_master_ip(master_params.name,
4208                                                      master_params, ems)
4209       if result.fail_msg:
4210         self.LogWarning("Could not re-enable the master ip on"
4211                         " the master, please restart manually: %s",
4212                         result.fail_msg)
4213
4214
4215 def _UploadHelper(lu, nodes, fname):
4216   """Helper for uploading a file and showing warnings.
4217
4218   """
4219   if os.path.exists(fname):
4220     result = lu.rpc.call_upload_file(nodes, fname)
4221     for to_node, to_result in result.items():
4222       msg = to_result.fail_msg
4223       if msg:
4224         msg = ("Copy of file %s to node %s failed: %s" %
4225                (fname, to_node, msg))
4226         lu.proc.LogWarning(msg)
4227
4228
4229 def _ComputeAncillaryFiles(cluster, redist):
4230   """Compute files external to Ganeti which need to be consistent.
4231
4232   @type redist: boolean
4233   @param redist: Whether to include files which need to be redistributed
4234
4235   """
4236   # Compute files for all nodes
4237   files_all = set([
4238     constants.SSH_KNOWN_HOSTS_FILE,
4239     constants.CONFD_HMAC_KEY,
4240     constants.CLUSTER_DOMAIN_SECRET_FILE,
4241     constants.SPICE_CERT_FILE,
4242     constants.SPICE_CACERT_FILE,
4243     constants.RAPI_USERS_FILE,
4244     ])
4245
4246   if not redist:
4247     files_all.update(constants.ALL_CERT_FILES)
4248     files_all.update(ssconf.SimpleStore().GetFileList())
4249   else:
4250     # we need to ship at least the RAPI certificate
4251     files_all.add(constants.RAPI_CERT_FILE)
4252
4253   if cluster.modify_etc_hosts:
4254     files_all.add(constants.ETC_HOSTS)
4255
4256   # Files which are optional, these must:
4257   # - be present in one other category as well
4258   # - either exist or not exist on all nodes of that category (mc, vm all)
4259   files_opt = set([
4260     constants.RAPI_USERS_FILE,
4261     ])
4262
4263   # Files which should only be on master candidates
4264   files_mc = set()
4265
4266   if not redist:
4267     files_mc.add(constants.CLUSTER_CONF_FILE)
4268
4269     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4270     # replication
4271     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4272
4273   # Files which should only be on VM-capable nodes
4274   files_vm = set(filename
4275     for hv_name in cluster.enabled_hypervisors
4276     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4277
4278   files_opt |= set(filename
4279     for hv_name in cluster.enabled_hypervisors
4280     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4281
4282   # Filenames in each category must be unique
4283   all_files_set = files_all | files_mc | files_vm
4284   assert (len(all_files_set) ==
4285           sum(map(len, [files_all, files_mc, files_vm]))), \
4286          "Found file listed in more than one file list"
4287
4288   # Optional files must be present in one other category
4289   assert all_files_set.issuperset(files_opt), \
4290          "Optional file not in a different required list"
4291
4292   return (files_all, files_opt, files_mc, files_vm)
4293
4294
4295 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4296   """Distribute additional files which are part of the cluster configuration.
4297
4298   ConfigWriter takes care of distributing the config and ssconf files, but
4299   there are more files which should be distributed to all nodes. This function
4300   makes sure those are copied.
4301
4302   @param lu: calling logical unit
4303   @param additional_nodes: list of nodes not in the config to distribute to
4304   @type additional_vm: boolean
4305   @param additional_vm: whether the additional nodes are vm-capable or not
4306
4307   """
4308   # Gather target nodes
4309   cluster = lu.cfg.GetClusterInfo()
4310   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4311
4312   online_nodes = lu.cfg.GetOnlineNodeList()
4313   vm_nodes = lu.cfg.GetVmCapableNodeList()
4314
4315   if additional_nodes is not None:
4316     online_nodes.extend(additional_nodes)
4317     if additional_vm:
4318       vm_nodes.extend(additional_nodes)
4319
4320   # Never distribute to master node
4321   for nodelist in [online_nodes, vm_nodes]:
4322     if master_info.name in nodelist:
4323       nodelist.remove(master_info.name)
4324
4325   # Gather file lists
4326   (files_all, _, files_mc, files_vm) = \
4327     _ComputeAncillaryFiles(cluster, True)
4328
4329   # Never re-distribute configuration file from here
4330   assert not (constants.CLUSTER_CONF_FILE in files_all or
4331               constants.CLUSTER_CONF_FILE in files_vm)
4332   assert not files_mc, "Master candidates not handled in this function"
4333
4334   filemap = [
4335     (online_nodes, files_all),
4336     (vm_nodes, files_vm),
4337     ]
4338
4339   # Upload the files
4340   for (node_list, files) in filemap:
4341     for fname in files:
4342       _UploadHelper(lu, node_list, fname)
4343
4344
4345 class LUClusterRedistConf(NoHooksLU):
4346   """Force the redistribution of cluster configuration.
4347
4348   This is a very simple LU.
4349
4350   """
4351   REQ_BGL = False
4352
4353   def ExpandNames(self):
4354     self.needed_locks = {
4355       locking.LEVEL_NODE: locking.ALL_SET,
4356     }
4357     self.share_locks[locking.LEVEL_NODE] = 1
4358
4359   def Exec(self, feedback_fn):
4360     """Redistribute the configuration.
4361
4362     """
4363     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4364     _RedistributeAncillaryFiles(self)
4365
4366
4367 class LUClusterActivateMasterIp(NoHooksLU):
4368   """Activate the master IP on the master node.
4369
4370   """
4371   def Exec(self, feedback_fn):
4372     """Activate the master IP.
4373
4374     """
4375     master_params = self.cfg.GetMasterNetworkParameters()
4376     ems = self.cfg.GetUseExternalMipScript()
4377     result = self.rpc.call_node_activate_master_ip(master_params.name,
4378                                                    master_params, ems)
4379     result.Raise("Could not activate the master IP")
4380
4381
4382 class LUClusterDeactivateMasterIp(NoHooksLU):
4383   """Deactivate the master IP on the master node.
4384
4385   """
4386   def Exec(self, feedback_fn):
4387     """Deactivate the master IP.
4388
4389     """
4390     master_params = self.cfg.GetMasterNetworkParameters()
4391     ems = self.cfg.GetUseExternalMipScript()
4392     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4393                                                      master_params, ems)
4394     result.Raise("Could not deactivate the master IP")
4395
4396
4397 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4398   """Sleep and poll for an instance's disk to sync.
4399
4400   """
4401   if not instance.disks or disks is not None and not disks:
4402     return True
4403
4404   disks = _ExpandCheckDisks(instance, disks)
4405
4406   if not oneshot:
4407     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4408
4409   node = instance.primary_node
4410
4411   for dev in disks:
4412     lu.cfg.SetDiskID(dev, node)
4413
4414   # TODO: Convert to utils.Retry
4415
4416   retries = 0
4417   degr_retries = 10 # in seconds, as we sleep 1 second each time
4418   while True:
4419     max_time = 0
4420     done = True
4421     cumul_degraded = False
4422     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4423     msg = rstats.fail_msg
4424     if msg:
4425       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4426       retries += 1
4427       if retries >= 10:
4428         raise errors.RemoteError("Can't contact node %s for mirror data,"
4429                                  " aborting." % node)
4430       time.sleep(6)
4431       continue
4432     rstats = rstats.payload
4433     retries = 0
4434     for i, mstat in enumerate(rstats):
4435       if mstat is None:
4436         lu.LogWarning("Can't compute data for node %s/%s",
4437                            node, disks[i].iv_name)
4438         continue
4439
4440       cumul_degraded = (cumul_degraded or
4441                         (mstat.is_degraded and mstat.sync_percent is None))
4442       if mstat.sync_percent is not None:
4443         done = False
4444         if mstat.estimated_time is not None:
4445           rem_time = ("%s remaining (estimated)" %
4446                       utils.FormatSeconds(mstat.estimated_time))
4447           max_time = mstat.estimated_time
4448         else:
4449           rem_time = "no time estimate"
4450         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4451                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4452
4453     # if we're done but degraded, let's do a few small retries, to
4454     # make sure we see a stable and not transient situation; therefore
4455     # we force restart of the loop
4456     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4457       logging.info("Degraded disks found, %d retries left", degr_retries)
4458       degr_retries -= 1
4459       time.sleep(1)
4460       continue
4461
4462     if done or oneshot:
4463       break
4464
4465     time.sleep(min(60, max_time))
4466
4467   if done:
4468     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4469   return not cumul_degraded
4470
4471
4472 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4473   """Check that mirrors are not degraded.
4474
4475   The ldisk parameter, if True, will change the test from the
4476   is_degraded attribute (which represents overall non-ok status for
4477   the device(s)) to the ldisk (representing the local storage status).
4478
4479   """
4480   lu.cfg.SetDiskID(dev, node)
4481
4482   result = True
4483
4484   if on_primary or dev.AssembleOnSecondary():
4485     rstats = lu.rpc.call_blockdev_find(node, dev)
4486     msg = rstats.fail_msg
4487     if msg:
4488       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4489       result = False
4490     elif not rstats.payload:
4491       lu.LogWarning("Can't find disk on node %s", node)
4492       result = False
4493     else:
4494       if ldisk:
4495         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4496       else:
4497         result = result and not rstats.payload.is_degraded
4498
4499   if dev.children:
4500     for child in dev.children:
4501       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4502
4503   return result
4504
4505
4506 class LUOobCommand(NoHooksLU):
4507   """Logical unit for OOB handling.
4508
4509   """
4510   REG_BGL = False
4511   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4512
4513   def ExpandNames(self):
4514     """Gather locks we need.
4515
4516     """
4517     if self.op.node_names:
4518       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4519       lock_names = self.op.node_names
4520     else:
4521       lock_names = locking.ALL_SET
4522
4523     self.needed_locks = {
4524       locking.LEVEL_NODE: lock_names,
4525       }
4526
4527   def CheckPrereq(self):
4528     """Check prerequisites.
4529
4530     This checks:
4531      - the node exists in the configuration
4532      - OOB is supported
4533
4534     Any errors are signaled by raising errors.OpPrereqError.
4535
4536     """
4537     self.nodes = []
4538     self.master_node = self.cfg.GetMasterNode()
4539
4540     assert self.op.power_delay >= 0.0
4541
4542     if self.op.node_names:
4543       if (self.op.command in self._SKIP_MASTER and
4544           self.master_node in self.op.node_names):
4545         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4546         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4547
4548         if master_oob_handler:
4549           additional_text = ("run '%s %s %s' if you want to operate on the"
4550                              " master regardless") % (master_oob_handler,
4551                                                       self.op.command,
4552                                                       self.master_node)
4553         else:
4554           additional_text = "it does not support out-of-band operations"
4555
4556         raise errors.OpPrereqError(("Operating on the master node %s is not"
4557                                     " allowed for %s; %s") %
4558                                    (self.master_node, self.op.command,
4559                                     additional_text), errors.ECODE_INVAL)
4560     else:
4561       self.op.node_names = self.cfg.GetNodeList()
4562       if self.op.command in self._SKIP_MASTER:
4563         self.op.node_names.remove(self.master_node)
4564
4565     if self.op.command in self._SKIP_MASTER:
4566       assert self.master_node not in self.op.node_names
4567
4568     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4569       if node is None:
4570         raise errors.OpPrereqError("Node %s not found" % node_name,
4571                                    errors.ECODE_NOENT)
4572       else:
4573         self.nodes.append(node)
4574
4575       if (not self.op.ignore_status and
4576           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4577         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4578                                     " not marked offline") % node_name,
4579                                    errors.ECODE_STATE)
4580
4581   def Exec(self, feedback_fn):
4582     """Execute OOB and return result if we expect any.
4583
4584     """
4585     master_node = self.master_node
4586     ret = []
4587
4588     for idx, node in enumerate(utils.NiceSort(self.nodes,
4589                                               key=lambda node: node.name)):
4590       node_entry = [(constants.RS_NORMAL, node.name)]
4591       ret.append(node_entry)
4592
4593       oob_program = _SupportsOob(self.cfg, node)
4594
4595       if not oob_program:
4596         node_entry.append((constants.RS_UNAVAIL, None))
4597         continue
4598
4599       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4600                    self.op.command, oob_program, node.name)
4601       result = self.rpc.call_run_oob(master_node, oob_program,
4602                                      self.op.command, node.name,
4603                                      self.op.timeout)
4604
4605       if result.fail_msg:
4606         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4607                         node.name, result.fail_msg)
4608         node_entry.append((constants.RS_NODATA, None))
4609       else:
4610         try:
4611           self._CheckPayload(result)
4612         except errors.OpExecError, err:
4613           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4614                           node.name, err)
4615           node_entry.append((constants.RS_NODATA, None))
4616         else:
4617           if self.op.command == constants.OOB_HEALTH:
4618             # For health we should log important events
4619             for item, status in result.payload:
4620               if status in [constants.OOB_STATUS_WARNING,
4621                             constants.OOB_STATUS_CRITICAL]:
4622                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4623                                 item, node.name, status)
4624
4625           if self.op.command == constants.OOB_POWER_ON:
4626             node.powered = True
4627           elif self.op.command == constants.OOB_POWER_OFF:
4628             node.powered = False
4629           elif self.op.command == constants.OOB_POWER_STATUS:
4630             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4631             if powered != node.powered:
4632               logging.warning(("Recorded power state (%s) of node '%s' does not"
4633                                " match actual power state (%s)"), node.powered,
4634                               node.name, powered)
4635
4636           # For configuration changing commands we should update the node
4637           if self.op.command in (constants.OOB_POWER_ON,
4638                                  constants.OOB_POWER_OFF):
4639             self.cfg.Update(node, feedback_fn)
4640
4641           node_entry.append((constants.RS_NORMAL, result.payload))
4642
4643           if (self.op.command == constants.OOB_POWER_ON and
4644               idx < len(self.nodes) - 1):
4645             time.sleep(self.op.power_delay)
4646
4647     return ret
4648
4649   def _CheckPayload(self, result):
4650     """Checks if the payload is valid.
4651
4652     @param result: RPC result
4653     @raises errors.OpExecError: If payload is not valid
4654
4655     """
4656     errs = []
4657     if self.op.command == constants.OOB_HEALTH:
4658       if not isinstance(result.payload, list):
4659         errs.append("command 'health' is expected to return a list but got %s" %
4660                     type(result.payload))
4661       else:
4662         for item, status in result.payload:
4663           if status not in constants.OOB_STATUSES:
4664             errs.append("health item '%s' has invalid status '%s'" %
4665                         (item, status))
4666
4667     if self.op.command == constants.OOB_POWER_STATUS:
4668       if not isinstance(result.payload, dict):
4669         errs.append("power-status is expected to return a dict but got %s" %
4670                     type(result.payload))
4671
4672     if self.op.command in [
4673         constants.OOB_POWER_ON,
4674         constants.OOB_POWER_OFF,
4675         constants.OOB_POWER_CYCLE,
4676         ]:
4677       if result.payload is not None:
4678         errs.append("%s is expected to not return payload but got '%s'" %
4679                     (self.op.command, result.payload))
4680
4681     if errs:
4682       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4683                                utils.CommaJoin(errs))
4684
4685
4686 class _OsQuery(_QueryBase):
4687   FIELDS = query.OS_FIELDS
4688
4689   def ExpandNames(self, lu):
4690     # Lock all nodes in shared mode
4691     # Temporary removal of locks, should be reverted later
4692     # TODO: reintroduce locks when they are lighter-weight
4693     lu.needed_locks = {}
4694     #self.share_locks[locking.LEVEL_NODE] = 1
4695     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4696
4697     # The following variables interact with _QueryBase._GetNames
4698     if self.names:
4699       self.wanted = self.names
4700     else:
4701       self.wanted = locking.ALL_SET
4702
4703     self.do_locking = self.use_locking
4704
4705   def DeclareLocks(self, lu, level):
4706     pass
4707
4708   @staticmethod
4709   def _DiagnoseByOS(rlist):
4710     """Remaps a per-node return list into an a per-os per-node dictionary
4711
4712     @param rlist: a map with node names as keys and OS objects as values
4713
4714     @rtype: dict
4715     @return: a dictionary with osnames as keys and as value another
4716         map, with nodes as keys and tuples of (path, status, diagnose,
4717         variants, parameters, api_versions) as values, eg::
4718
4719           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4720                                      (/srv/..., False, "invalid api")],
4721                            "node2": [(/srv/..., True, "", [], [])]}
4722           }
4723
4724     """
4725     all_os = {}
4726     # we build here the list of nodes that didn't fail the RPC (at RPC
4727     # level), so that nodes with a non-responding node daemon don't
4728     # make all OSes invalid
4729     good_nodes = [node_name for node_name in rlist
4730                   if not rlist[node_name].fail_msg]
4731     for node_name, nr in rlist.items():
4732       if nr.fail_msg or not nr.payload:
4733         continue
4734       for (name, path, status, diagnose, variants,
4735            params, api_versions) in nr.payload:
4736         if name not in all_os:
4737           # build a list of nodes for this os containing empty lists
4738           # for each node in node_list
4739           all_os[name] = {}
4740           for nname in good_nodes:
4741             all_os[name][nname] = []
4742         # convert params from [name, help] to (name, help)
4743         params = [tuple(v) for v in params]
4744         all_os[name][node_name].append((path, status, diagnose,
4745                                         variants, params, api_versions))
4746     return all_os
4747
4748   def _GetQueryData(self, lu):
4749     """Computes the list of nodes and their attributes.
4750
4751     """
4752     # Locking is not used
4753     assert not (compat.any(lu.glm.is_owned(level)
4754                            for level in locking.LEVELS
4755                            if level != locking.LEVEL_CLUSTER) or
4756                 self.do_locking or self.use_locking)
4757
4758     valid_nodes = [node.name
4759                    for node in lu.cfg.GetAllNodesInfo().values()
4760                    if not node.offline and node.vm_capable]
4761     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4762     cluster = lu.cfg.GetClusterInfo()
4763
4764     data = {}
4765
4766     for (os_name, os_data) in pol.items():
4767       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4768                           hidden=(os_name in cluster.hidden_os),
4769                           blacklisted=(os_name in cluster.blacklisted_os))
4770
4771       variants = set()
4772       parameters = set()
4773       api_versions = set()
4774
4775       for idx, osl in enumerate(os_data.values()):
4776         info.valid = bool(info.valid and osl and osl[0][1])
4777         if not info.valid:
4778           break
4779
4780         (node_variants, node_params, node_api) = osl[0][3:6]
4781         if idx == 0:
4782           # First entry
4783           variants.update(node_variants)
4784           parameters.update(node_params)
4785           api_versions.update(node_api)
4786         else:
4787           # Filter out inconsistent values
4788           variants.intersection_update(node_variants)
4789           parameters.intersection_update(node_params)
4790           api_versions.intersection_update(node_api)
4791
4792       info.variants = list(variants)
4793       info.parameters = list(parameters)
4794       info.api_versions = list(api_versions)
4795
4796       data[os_name] = info
4797
4798     # Prepare data in requested order
4799     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4800             if name in data]
4801
4802
4803 class LUOsDiagnose(NoHooksLU):
4804   """Logical unit for OS diagnose/query.
4805
4806   """
4807   REQ_BGL = False
4808
4809   @staticmethod
4810   def _BuildFilter(fields, names):
4811     """Builds a filter for querying OSes.
4812
4813     """
4814     name_filter = qlang.MakeSimpleFilter("name", names)
4815
4816     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4817     # respective field is not requested
4818     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4819                      for fname in ["hidden", "blacklisted"]
4820                      if fname not in fields]
4821     if "valid" not in fields:
4822       status_filter.append([qlang.OP_TRUE, "valid"])
4823
4824     if status_filter:
4825       status_filter.insert(0, qlang.OP_AND)
4826     else:
4827       status_filter = None
4828
4829     if name_filter and status_filter:
4830       return [qlang.OP_AND, name_filter, status_filter]
4831     elif name_filter:
4832       return name_filter
4833     else:
4834       return status_filter
4835
4836   def CheckArguments(self):
4837     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4838                        self.op.output_fields, False)
4839
4840   def ExpandNames(self):
4841     self.oq.ExpandNames(self)
4842
4843   def Exec(self, feedback_fn):
4844     return self.oq.OldStyleQuery(self)
4845
4846
4847 class LUNodeRemove(LogicalUnit):
4848   """Logical unit for removing a node.
4849
4850   """
4851   HPATH = "node-remove"
4852   HTYPE = constants.HTYPE_NODE
4853
4854   def BuildHooksEnv(self):
4855     """Build hooks env.
4856
4857     """
4858     return {
4859       "OP_TARGET": self.op.node_name,
4860       "NODE_NAME": self.op.node_name,
4861       }
4862
4863   def BuildHooksNodes(self):
4864     """Build hooks nodes.
4865
4866     This doesn't run on the target node in the pre phase as a failed
4867     node would then be impossible to remove.
4868
4869     """
4870     all_nodes = self.cfg.GetNodeList()
4871     try:
4872       all_nodes.remove(self.op.node_name)
4873     except ValueError:
4874       pass
4875     return (all_nodes, all_nodes)
4876
4877   def CheckPrereq(self):
4878     """Check prerequisites.
4879
4880     This checks:
4881      - the node exists in the configuration
4882      - it does not have primary or secondary instances
4883      - it's not the master
4884
4885     Any errors are signaled by raising errors.OpPrereqError.
4886
4887     """
4888     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4889     node = self.cfg.GetNodeInfo(self.op.node_name)
4890     assert node is not None
4891
4892     masternode = self.cfg.GetMasterNode()
4893     if node.name == masternode:
4894       raise errors.OpPrereqError("Node is the master node, failover to another"
4895                                  " node is required", errors.ECODE_INVAL)
4896
4897     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4898       if node.name in instance.all_nodes:
4899         raise errors.OpPrereqError("Instance %s is still running on the node,"
4900                                    " please remove first" % instance_name,
4901                                    errors.ECODE_INVAL)
4902     self.op.node_name = node.name
4903     self.node = node
4904
4905   def Exec(self, feedback_fn):
4906     """Removes the node from the cluster.
4907
4908     """
4909     node = self.node
4910     logging.info("Stopping the node daemon and removing configs from node %s",
4911                  node.name)
4912
4913     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4914
4915     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4916       "Not owning BGL"
4917
4918     # Promote nodes to master candidate as needed
4919     _AdjustCandidatePool(self, exceptions=[node.name])
4920     self.context.RemoveNode(node.name)
4921
4922     # Run post hooks on the node before it's removed
4923     _RunPostHook(self, node.name)
4924
4925     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4926     msg = result.fail_msg
4927     if msg:
4928       self.LogWarning("Errors encountered on the remote node while leaving"
4929                       " the cluster: %s", msg)
4930
4931     # Remove node from our /etc/hosts
4932     if self.cfg.GetClusterInfo().modify_etc_hosts:
4933       master_node = self.cfg.GetMasterNode()
4934       result = self.rpc.call_etc_hosts_modify(master_node,
4935                                               constants.ETC_HOSTS_REMOVE,
4936                                               node.name, None)
4937       result.Raise("Can't update hosts file with new host data")
4938       _RedistributeAncillaryFiles(self)
4939
4940
4941 class _NodeQuery(_QueryBase):
4942   FIELDS = query.NODE_FIELDS
4943
4944   def ExpandNames(self, lu):
4945     lu.needed_locks = {}
4946     lu.share_locks = _ShareAll()
4947
4948     if self.names:
4949       self.wanted = _GetWantedNodes(lu, self.names)
4950     else:
4951       self.wanted = locking.ALL_SET
4952
4953     self.do_locking = (self.use_locking and
4954                        query.NQ_LIVE in self.requested_data)
4955
4956     if self.do_locking:
4957       # If any non-static field is requested we need to lock the nodes
4958       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4959
4960   def DeclareLocks(self, lu, level):
4961     pass
4962
4963   def _GetQueryData(self, lu):
4964     """Computes the list of nodes and their attributes.
4965
4966     """
4967     all_info = lu.cfg.GetAllNodesInfo()
4968
4969     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4970
4971     # Gather data as requested
4972     if query.NQ_LIVE in self.requested_data:
4973       # filter out non-vm_capable nodes
4974       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4975
4976       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4977                                         [lu.cfg.GetHypervisorType()])
4978       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4979                        for (name, nresult) in node_data.items()
4980                        if not nresult.fail_msg and nresult.payload)
4981     else:
4982       live_data = None
4983
4984     if query.NQ_INST in self.requested_data:
4985       node_to_primary = dict([(name, set()) for name in nodenames])
4986       node_to_secondary = dict([(name, set()) for name in nodenames])
4987
4988       inst_data = lu.cfg.GetAllInstancesInfo()
4989
4990       for inst in inst_data.values():
4991         if inst.primary_node in node_to_primary:
4992           node_to_primary[inst.primary_node].add(inst.name)
4993         for secnode in inst.secondary_nodes:
4994           if secnode in node_to_secondary:
4995             node_to_secondary[secnode].add(inst.name)
4996     else:
4997       node_to_primary = None
4998       node_to_secondary = None
4999
5000     if query.NQ_OOB in self.requested_data:
5001       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5002                          for name, node in all_info.iteritems())
5003     else:
5004       oob_support = None
5005
5006     if query.NQ_GROUP in self.requested_data:
5007       groups = lu.cfg.GetAllNodeGroupsInfo()
5008     else:
5009       groups = {}
5010
5011     return query.NodeQueryData([all_info[name] for name in nodenames],
5012                                live_data, lu.cfg.GetMasterNode(),
5013                                node_to_primary, node_to_secondary, groups,
5014                                oob_support, lu.cfg.GetClusterInfo())
5015
5016
5017 class LUNodeQuery(NoHooksLU):
5018   """Logical unit for querying nodes.
5019
5020   """
5021   # pylint: disable=W0142
5022   REQ_BGL = False
5023
5024   def CheckArguments(self):
5025     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5026                          self.op.output_fields, self.op.use_locking)
5027
5028   def ExpandNames(self):
5029     self.nq.ExpandNames(self)
5030
5031   def DeclareLocks(self, level):
5032     self.nq.DeclareLocks(self, level)
5033
5034   def Exec(self, feedback_fn):
5035     return self.nq.OldStyleQuery(self)
5036
5037
5038 class LUNodeQueryvols(NoHooksLU):
5039   """Logical unit for getting volumes on node(s).
5040
5041   """
5042   REQ_BGL = False
5043   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5044   _FIELDS_STATIC = utils.FieldSet("node")
5045
5046   def CheckArguments(self):
5047     _CheckOutputFields(static=self._FIELDS_STATIC,
5048                        dynamic=self._FIELDS_DYNAMIC,
5049                        selected=self.op.output_fields)
5050
5051   def ExpandNames(self):
5052     self.share_locks = _ShareAll()
5053     self.needed_locks = {}
5054
5055     if not self.op.nodes:
5056       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5057     else:
5058       self.needed_locks[locking.LEVEL_NODE] = \
5059         _GetWantedNodes(self, self.op.nodes)
5060
5061   def Exec(self, feedback_fn):
5062     """Computes the list of nodes and their attributes.
5063
5064     """
5065     nodenames = self.owned_locks(locking.LEVEL_NODE)
5066     volumes = self.rpc.call_node_volumes(nodenames)
5067
5068     ilist = self.cfg.GetAllInstancesInfo()
5069     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5070
5071     output = []
5072     for node in nodenames:
5073       nresult = volumes[node]
5074       if nresult.offline:
5075         continue
5076       msg = nresult.fail_msg
5077       if msg:
5078         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5079         continue
5080
5081       node_vols = sorted(nresult.payload,
5082                          key=operator.itemgetter("dev"))
5083
5084       for vol in node_vols:
5085         node_output = []
5086         for field in self.op.output_fields:
5087           if field == "node":
5088             val = node
5089           elif field == "phys":
5090             val = vol["dev"]
5091           elif field == "vg":
5092             val = vol["vg"]
5093           elif field == "name":
5094             val = vol["name"]
5095           elif field == "size":
5096             val = int(float(vol["size"]))
5097           elif field == "instance":
5098             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5099           else:
5100             raise errors.ParameterError(field)
5101           node_output.append(str(val))
5102
5103         output.append(node_output)
5104
5105     return output
5106
5107
5108 class LUNodeQueryStorage(NoHooksLU):
5109   """Logical unit for getting information on storage units on node(s).
5110
5111   """
5112   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5113   REQ_BGL = False
5114
5115   def CheckArguments(self):
5116     _CheckOutputFields(static=self._FIELDS_STATIC,
5117                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5118                        selected=self.op.output_fields)
5119
5120   def ExpandNames(self):
5121     self.share_locks = _ShareAll()
5122     self.needed_locks = {}
5123
5124     if self.op.nodes:
5125       self.needed_locks[locking.LEVEL_NODE] = \
5126         _GetWantedNodes(self, self.op.nodes)
5127     else:
5128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5129
5130   def Exec(self, feedback_fn):
5131     """Computes the list of nodes and their attributes.
5132
5133     """
5134     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5135
5136     # Always get name to sort by
5137     if constants.SF_NAME in self.op.output_fields:
5138       fields = self.op.output_fields[:]
5139     else:
5140       fields = [constants.SF_NAME] + self.op.output_fields
5141
5142     # Never ask for node or type as it's only known to the LU
5143     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5144       while extra in fields:
5145         fields.remove(extra)
5146
5147     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5148     name_idx = field_idx[constants.SF_NAME]
5149
5150     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5151     data = self.rpc.call_storage_list(self.nodes,
5152                                       self.op.storage_type, st_args,
5153                                       self.op.name, fields)
5154
5155     result = []
5156
5157     for node in utils.NiceSort(self.nodes):
5158       nresult = data[node]
5159       if nresult.offline:
5160         continue
5161
5162       msg = nresult.fail_msg
5163       if msg:
5164         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5165         continue
5166
5167       rows = dict([(row[name_idx], row) for row in nresult.payload])
5168
5169       for name in utils.NiceSort(rows.keys()):
5170         row = rows[name]
5171
5172         out = []
5173
5174         for field in self.op.output_fields:
5175           if field == constants.SF_NODE:
5176             val = node
5177           elif field == constants.SF_TYPE:
5178             val = self.op.storage_type
5179           elif field in field_idx:
5180             val = row[field_idx[field]]
5181           else:
5182             raise errors.ParameterError(field)
5183
5184           out.append(val)
5185
5186         result.append(out)
5187
5188     return result
5189
5190
5191 class _InstanceQuery(_QueryBase):
5192   FIELDS = query.INSTANCE_FIELDS
5193
5194   def ExpandNames(self, lu):
5195     lu.needed_locks = {}
5196     lu.share_locks = _ShareAll()
5197
5198     if self.names:
5199       self.wanted = _GetWantedInstances(lu, self.names)
5200     else:
5201       self.wanted = locking.ALL_SET
5202
5203     self.do_locking = (self.use_locking and
5204                        query.IQ_LIVE in self.requested_data)
5205     if self.do_locking:
5206       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5207       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5208       lu.needed_locks[locking.LEVEL_NODE] = []
5209       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5210
5211     self.do_grouplocks = (self.do_locking and
5212                           query.IQ_NODES in self.requested_data)
5213
5214   def DeclareLocks(self, lu, level):
5215     if self.do_locking:
5216       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5217         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5218
5219         # Lock all groups used by instances optimistically; this requires going
5220         # via the node before it's locked, requiring verification later on
5221         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5222           set(group_uuid
5223               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5224               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5225       elif level == locking.LEVEL_NODE:
5226         lu._LockInstancesNodes() # pylint: disable=W0212
5227
5228   @staticmethod
5229   def _CheckGroupLocks(lu):
5230     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5231     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5232
5233     # Check if node groups for locked instances are still correct
5234     for instance_name in owned_instances:
5235       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5236
5237   def _GetQueryData(self, lu):
5238     """Computes the list of instances and their attributes.
5239
5240     """
5241     if self.do_grouplocks:
5242       self._CheckGroupLocks(lu)
5243
5244     cluster = lu.cfg.GetClusterInfo()
5245     all_info = lu.cfg.GetAllInstancesInfo()
5246
5247     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5248
5249     instance_list = [all_info[name] for name in instance_names]
5250     nodes = frozenset(itertools.chain(*(inst.all_nodes
5251                                         for inst in instance_list)))
5252     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5253     bad_nodes = []
5254     offline_nodes = []
5255     wrongnode_inst = set()
5256
5257     # Gather data as requested
5258     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5259       live_data = {}
5260       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5261       for name in nodes:
5262         result = node_data[name]
5263         if result.offline:
5264           # offline nodes will be in both lists
5265           assert result.fail_msg
5266           offline_nodes.append(name)
5267         if result.fail_msg:
5268           bad_nodes.append(name)
5269         elif result.payload:
5270           for inst in result.payload:
5271             if inst in all_info:
5272               if all_info[inst].primary_node == name:
5273                 live_data.update(result.payload)
5274               else:
5275                 wrongnode_inst.add(inst)
5276             else:
5277               # orphan instance; we don't list it here as we don't
5278               # handle this case yet in the output of instance listing
5279               logging.warning("Orphan instance '%s' found on node %s",
5280                               inst, name)
5281         # else no instance is alive
5282     else:
5283       live_data = {}
5284
5285     if query.IQ_DISKUSAGE in self.requested_data:
5286       disk_usage = dict((inst.name,
5287                          _ComputeDiskSize(inst.disk_template,
5288                                           [{constants.IDISK_SIZE: disk.size}
5289                                            for disk in inst.disks]))
5290                         for inst in instance_list)
5291     else:
5292       disk_usage = None
5293
5294     if query.IQ_CONSOLE in self.requested_data:
5295       consinfo = {}
5296       for inst in instance_list:
5297         if inst.name in live_data:
5298           # Instance is running
5299           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5300         else:
5301           consinfo[inst.name] = None
5302       assert set(consinfo.keys()) == set(instance_names)
5303     else:
5304       consinfo = None
5305
5306     if query.IQ_NODES in self.requested_data:
5307       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5308                                             instance_list)))
5309       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5310       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5311                     for uuid in set(map(operator.attrgetter("group"),
5312                                         nodes.values())))
5313     else:
5314       nodes = None
5315       groups = None
5316
5317     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5318                                    disk_usage, offline_nodes, bad_nodes,
5319                                    live_data, wrongnode_inst, consinfo,
5320                                    nodes, groups)
5321
5322
5323 class LUQuery(NoHooksLU):
5324   """Query for resources/items of a certain kind.
5325
5326   """
5327   # pylint: disable=W0142
5328   REQ_BGL = False
5329
5330   def CheckArguments(self):
5331     qcls = _GetQueryImplementation(self.op.what)
5332
5333     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5334
5335   def ExpandNames(self):
5336     self.impl.ExpandNames(self)
5337
5338   def DeclareLocks(self, level):
5339     self.impl.DeclareLocks(self, level)
5340
5341   def Exec(self, feedback_fn):
5342     return self.impl.NewStyleQuery(self)
5343
5344
5345 class LUQueryFields(NoHooksLU):
5346   """Query for resources/items of a certain kind.
5347
5348   """
5349   # pylint: disable=W0142
5350   REQ_BGL = False
5351
5352   def CheckArguments(self):
5353     self.qcls = _GetQueryImplementation(self.op.what)
5354
5355   def ExpandNames(self):
5356     self.needed_locks = {}
5357
5358   def Exec(self, feedback_fn):
5359     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5360
5361
5362 class LUNodeModifyStorage(NoHooksLU):
5363   """Logical unit for modifying a storage volume on a node.
5364
5365   """
5366   REQ_BGL = False
5367
5368   def CheckArguments(self):
5369     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5370
5371     storage_type = self.op.storage_type
5372
5373     try:
5374       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5375     except KeyError:
5376       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5377                                  " modified" % storage_type,
5378                                  errors.ECODE_INVAL)
5379
5380     diff = set(self.op.changes.keys()) - modifiable
5381     if diff:
5382       raise errors.OpPrereqError("The following fields can not be modified for"
5383                                  " storage units of type '%s': %r" %
5384                                  (storage_type, list(diff)),
5385                                  errors.ECODE_INVAL)
5386
5387   def ExpandNames(self):
5388     self.needed_locks = {
5389       locking.LEVEL_NODE: self.op.node_name,
5390       }
5391
5392   def Exec(self, feedback_fn):
5393     """Computes the list of nodes and their attributes.
5394
5395     """
5396     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5397     result = self.rpc.call_storage_modify(self.op.node_name,
5398                                           self.op.storage_type, st_args,
5399                                           self.op.name, self.op.changes)
5400     result.Raise("Failed to modify storage unit '%s' on %s" %
5401                  (self.op.name, self.op.node_name))
5402
5403
5404 class LUNodeAdd(LogicalUnit):
5405   """Logical unit for adding node to the cluster.
5406
5407   """
5408   HPATH = "node-add"
5409   HTYPE = constants.HTYPE_NODE
5410   _NFLAGS = ["master_capable", "vm_capable"]
5411
5412   def CheckArguments(self):
5413     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5414     # validate/normalize the node name
5415     self.hostname = netutils.GetHostname(name=self.op.node_name,
5416                                          family=self.primary_ip_family)
5417     self.op.node_name = self.hostname.name
5418
5419     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5420       raise errors.OpPrereqError("Cannot readd the master node",
5421                                  errors.ECODE_STATE)
5422
5423     if self.op.readd and self.op.group:
5424       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5425                                  " being readded", errors.ECODE_INVAL)
5426
5427   def BuildHooksEnv(self):
5428     """Build hooks env.
5429
5430     This will run on all nodes before, and on all nodes + the new node after.
5431
5432     """
5433     return {
5434       "OP_TARGET": self.op.node_name,
5435       "NODE_NAME": self.op.node_name,
5436       "NODE_PIP": self.op.primary_ip,
5437       "NODE_SIP": self.op.secondary_ip,
5438       "MASTER_CAPABLE": str(self.op.master_capable),
5439       "VM_CAPABLE": str(self.op.vm_capable),
5440       }
5441
5442   def BuildHooksNodes(self):
5443     """Build hooks nodes.
5444
5445     """
5446     # Exclude added node
5447     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5448     post_nodes = pre_nodes + [self.op.node_name, ]
5449
5450     return (pre_nodes, post_nodes)
5451
5452   def CheckPrereq(self):
5453     """Check prerequisites.
5454
5455     This checks:
5456      - the new node is not already in the config
5457      - it is resolvable
5458      - its parameters (single/dual homed) matches the cluster
5459
5460     Any errors are signaled by raising errors.OpPrereqError.
5461
5462     """
5463     cfg = self.cfg
5464     hostname = self.hostname
5465     node = hostname.name
5466     primary_ip = self.op.primary_ip = hostname.ip
5467     if self.op.secondary_ip is None:
5468       if self.primary_ip_family == netutils.IP6Address.family:
5469         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5470                                    " IPv4 address must be given as secondary",
5471                                    errors.ECODE_INVAL)
5472       self.op.secondary_ip = primary_ip
5473
5474     secondary_ip = self.op.secondary_ip
5475     if not netutils.IP4Address.IsValid(secondary_ip):
5476       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5477                                  " address" % secondary_ip, errors.ECODE_INVAL)
5478
5479     node_list = cfg.GetNodeList()
5480     if not self.op.readd and node in node_list:
5481       raise errors.OpPrereqError("Node %s is already in the configuration" %
5482                                  node, errors.ECODE_EXISTS)
5483     elif self.op.readd and node not in node_list:
5484       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5485                                  errors.ECODE_NOENT)
5486
5487     self.changed_primary_ip = False
5488
5489     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5490       if self.op.readd and node == existing_node_name:
5491         if existing_node.secondary_ip != secondary_ip:
5492           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5493                                      " address configuration as before",
5494                                      errors.ECODE_INVAL)
5495         if existing_node.primary_ip != primary_ip:
5496           self.changed_primary_ip = True
5497
5498         continue
5499
5500       if (existing_node.primary_ip == primary_ip or
5501           existing_node.secondary_ip == primary_ip or
5502           existing_node.primary_ip == secondary_ip or
5503           existing_node.secondary_ip == secondary_ip):
5504         raise errors.OpPrereqError("New node ip address(es) conflict with"
5505                                    " existing node %s" % existing_node.name,
5506                                    errors.ECODE_NOTUNIQUE)
5507
5508     # After this 'if' block, None is no longer a valid value for the
5509     # _capable op attributes
5510     if self.op.readd:
5511       old_node = self.cfg.GetNodeInfo(node)
5512       assert old_node is not None, "Can't retrieve locked node %s" % node
5513       for attr in self._NFLAGS:
5514         if getattr(self.op, attr) is None:
5515           setattr(self.op, attr, getattr(old_node, attr))
5516     else:
5517       for attr in self._NFLAGS:
5518         if getattr(self.op, attr) is None:
5519           setattr(self.op, attr, True)
5520
5521     if self.op.readd and not self.op.vm_capable:
5522       pri, sec = cfg.GetNodeInstances(node)
5523       if pri or sec:
5524         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5525                                    " flag set to false, but it already holds"
5526                                    " instances" % node,
5527                                    errors.ECODE_STATE)
5528
5529     # check that the type of the node (single versus dual homed) is the
5530     # same as for the master
5531     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5532     master_singlehomed = myself.secondary_ip == myself.primary_ip
5533     newbie_singlehomed = secondary_ip == primary_ip
5534     if master_singlehomed != newbie_singlehomed:
5535       if master_singlehomed:
5536         raise errors.OpPrereqError("The master has no secondary ip but the"
5537                                    " new node has one",
5538                                    errors.ECODE_INVAL)
5539       else:
5540         raise errors.OpPrereqError("The master has a secondary ip but the"
5541                                    " new node doesn't have one",
5542                                    errors.ECODE_INVAL)
5543
5544     # checks reachability
5545     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5546       raise errors.OpPrereqError("Node not reachable by ping",
5547                                  errors.ECODE_ENVIRON)
5548
5549     if not newbie_singlehomed:
5550       # check reachability from my secondary ip to newbie's secondary ip
5551       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5552                            source=myself.secondary_ip):
5553         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5554                                    " based ping to node daemon port",
5555                                    errors.ECODE_ENVIRON)
5556
5557     if self.op.readd:
5558       exceptions = [node]
5559     else:
5560       exceptions = []
5561
5562     if self.op.master_capable:
5563       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5564     else:
5565       self.master_candidate = False
5566
5567     if self.op.readd:
5568       self.new_node = old_node
5569     else:
5570       node_group = cfg.LookupNodeGroup(self.op.group)
5571       self.new_node = objects.Node(name=node,
5572                                    primary_ip=primary_ip,
5573                                    secondary_ip=secondary_ip,
5574                                    master_candidate=self.master_candidate,
5575                                    offline=False, drained=False,
5576                                    group=node_group)
5577
5578     if self.op.ndparams:
5579       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5580
5581     if self.op.hv_state:
5582       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5583
5584     if self.op.disk_state:
5585       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5586
5587   def Exec(self, feedback_fn):
5588     """Adds the new node to the cluster.
5589
5590     """
5591     new_node = self.new_node
5592     node = new_node.name
5593
5594     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5595       "Not owning BGL"
5596
5597     # We adding a new node so we assume it's powered
5598     new_node.powered = True
5599
5600     # for re-adds, reset the offline/drained/master-candidate flags;
5601     # we need to reset here, otherwise offline would prevent RPC calls
5602     # later in the procedure; this also means that if the re-add
5603     # fails, we are left with a non-offlined, broken node
5604     if self.op.readd:
5605       new_node.drained = new_node.offline = False # pylint: disable=W0201
5606       self.LogInfo("Readding a node, the offline/drained flags were reset")
5607       # if we demote the node, we do cleanup later in the procedure
5608       new_node.master_candidate = self.master_candidate
5609       if self.changed_primary_ip:
5610         new_node.primary_ip = self.op.primary_ip
5611
5612     # copy the master/vm_capable flags
5613     for attr in self._NFLAGS:
5614       setattr(new_node, attr, getattr(self.op, attr))
5615
5616     # notify the user about any possible mc promotion
5617     if new_node.master_candidate:
5618       self.LogInfo("Node will be a master candidate")
5619
5620     if self.op.ndparams:
5621       new_node.ndparams = self.op.ndparams
5622     else:
5623       new_node.ndparams = {}
5624
5625     if self.op.hv_state:
5626       new_node.hv_state_static = self.new_hv_state
5627
5628     if self.op.disk_state:
5629       new_node.disk_state_static = self.new_disk_state
5630
5631     # check connectivity
5632     result = self.rpc.call_version([node])[node]
5633     result.Raise("Can't get version information from node %s" % node)
5634     if constants.PROTOCOL_VERSION == result.payload:
5635       logging.info("Communication to node %s fine, sw version %s match",
5636                    node, result.payload)
5637     else:
5638       raise errors.OpExecError("Version mismatch master version %s,"
5639                                " node version %s" %
5640                                (constants.PROTOCOL_VERSION, result.payload))
5641
5642     # Add node to our /etc/hosts, and add key to known_hosts
5643     if self.cfg.GetClusterInfo().modify_etc_hosts:
5644       master_node = self.cfg.GetMasterNode()
5645       result = self.rpc.call_etc_hosts_modify(master_node,
5646                                               constants.ETC_HOSTS_ADD,
5647                                               self.hostname.name,
5648                                               self.hostname.ip)
5649       result.Raise("Can't update hosts file with new host data")
5650
5651     if new_node.secondary_ip != new_node.primary_ip:
5652       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5653                                False)
5654
5655     node_verify_list = [self.cfg.GetMasterNode()]
5656     node_verify_param = {
5657       constants.NV_NODELIST: ([node], {}),
5658       # TODO: do a node-net-test as well?
5659     }
5660
5661     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5662                                        self.cfg.GetClusterName())
5663     for verifier in node_verify_list:
5664       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5665       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5666       if nl_payload:
5667         for failed in nl_payload:
5668           feedback_fn("ssh/hostname verification failed"
5669                       " (checking from %s): %s" %
5670                       (verifier, nl_payload[failed]))
5671         raise errors.OpExecError("ssh/hostname verification failed")
5672
5673     if self.op.readd:
5674       _RedistributeAncillaryFiles(self)
5675       self.context.ReaddNode(new_node)
5676       # make sure we redistribute the config
5677       self.cfg.Update(new_node, feedback_fn)
5678       # and make sure the new node will not have old files around
5679       if not new_node.master_candidate:
5680         result = self.rpc.call_node_demote_from_mc(new_node.name)
5681         msg = result.fail_msg
5682         if msg:
5683           self.LogWarning("Node failed to demote itself from master"
5684                           " candidate status: %s" % msg)
5685     else:
5686       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5687                                   additional_vm=self.op.vm_capable)
5688       self.context.AddNode(new_node, self.proc.GetECId())
5689
5690
5691 class LUNodeSetParams(LogicalUnit):
5692   """Modifies the parameters of a node.
5693
5694   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5695       to the node role (as _ROLE_*)
5696   @cvar _R2F: a dictionary from node role to tuples of flags
5697   @cvar _FLAGS: a list of attribute names corresponding to the flags
5698
5699   """
5700   HPATH = "node-modify"
5701   HTYPE = constants.HTYPE_NODE
5702   REQ_BGL = False
5703   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5704   _F2R = {
5705     (True, False, False): _ROLE_CANDIDATE,
5706     (False, True, False): _ROLE_DRAINED,
5707     (False, False, True): _ROLE_OFFLINE,
5708     (False, False, False): _ROLE_REGULAR,
5709     }
5710   _R2F = dict((v, k) for k, v in _F2R.items())
5711   _FLAGS = ["master_candidate", "drained", "offline"]
5712
5713   def CheckArguments(self):
5714     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5715     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5716                 self.op.master_capable, self.op.vm_capable,
5717                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5718                 self.op.disk_state]
5719     if all_mods.count(None) == len(all_mods):
5720       raise errors.OpPrereqError("Please pass at least one modification",
5721                                  errors.ECODE_INVAL)
5722     if all_mods.count(True) > 1:
5723       raise errors.OpPrereqError("Can't set the node into more than one"
5724                                  " state at the same time",
5725                                  errors.ECODE_INVAL)
5726
5727     # Boolean value that tells us whether we might be demoting from MC
5728     self.might_demote = (self.op.master_candidate == False or
5729                          self.op.offline == True or
5730                          self.op.drained == True or
5731                          self.op.master_capable == False)
5732
5733     if self.op.secondary_ip:
5734       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5735         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5736                                    " address" % self.op.secondary_ip,
5737                                    errors.ECODE_INVAL)
5738
5739     self.lock_all = self.op.auto_promote and self.might_demote
5740     self.lock_instances = self.op.secondary_ip is not None
5741
5742   def _InstanceFilter(self, instance):
5743     """Filter for getting affected instances.
5744
5745     """
5746     return (instance.disk_template in constants.DTS_INT_MIRROR and
5747             self.op.node_name in instance.all_nodes)
5748
5749   def ExpandNames(self):
5750     if self.lock_all:
5751       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5752     else:
5753       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5754
5755     # Since modifying a node can have severe effects on currently running
5756     # operations the resource lock is at least acquired in shared mode
5757     self.needed_locks[locking.LEVEL_NODE_RES] = \
5758       self.needed_locks[locking.LEVEL_NODE]
5759
5760     # Get node resource and instance locks in shared mode; they are not used
5761     # for anything but read-only access
5762     self.share_locks[locking.LEVEL_NODE_RES] = 1
5763     self.share_locks[locking.LEVEL_INSTANCE] = 1
5764
5765     if self.lock_instances:
5766       self.needed_locks[locking.LEVEL_INSTANCE] = \
5767         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5768
5769   def BuildHooksEnv(self):
5770     """Build hooks env.
5771
5772     This runs on the master node.
5773
5774     """
5775     return {
5776       "OP_TARGET": self.op.node_name,
5777       "MASTER_CANDIDATE": str(self.op.master_candidate),
5778       "OFFLINE": str(self.op.offline),
5779       "DRAINED": str(self.op.drained),
5780       "MASTER_CAPABLE": str(self.op.master_capable),
5781       "VM_CAPABLE": str(self.op.vm_capable),
5782       }
5783
5784   def BuildHooksNodes(self):
5785     """Build hooks nodes.
5786
5787     """
5788     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5789     return (nl, nl)
5790
5791   def CheckPrereq(self):
5792     """Check prerequisites.
5793
5794     This only checks the instance list against the existing names.
5795
5796     """
5797     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5798
5799     if self.lock_instances:
5800       affected_instances = \
5801         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5802
5803       # Verify instance locks
5804       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5805       wanted_instances = frozenset(affected_instances.keys())
5806       if wanted_instances - owned_instances:
5807         raise errors.OpPrereqError("Instances affected by changing node %s's"
5808                                    " secondary IP address have changed since"
5809                                    " locks were acquired, wanted '%s', have"
5810                                    " '%s'; retry the operation" %
5811                                    (self.op.node_name,
5812                                     utils.CommaJoin(wanted_instances),
5813                                     utils.CommaJoin(owned_instances)),
5814                                    errors.ECODE_STATE)
5815     else:
5816       affected_instances = None
5817
5818     if (self.op.master_candidate is not None or
5819         self.op.drained is not None or
5820         self.op.offline is not None):
5821       # we can't change the master's node flags
5822       if self.op.node_name == self.cfg.GetMasterNode():
5823         raise errors.OpPrereqError("The master role can be changed"
5824                                    " only via master-failover",
5825                                    errors.ECODE_INVAL)
5826
5827     if self.op.master_candidate and not node.master_capable:
5828       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5829                                  " it a master candidate" % node.name,
5830                                  errors.ECODE_STATE)
5831
5832     if self.op.vm_capable == False:
5833       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5834       if ipri or isec:
5835         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5836                                    " the vm_capable flag" % node.name,
5837                                    errors.ECODE_STATE)
5838
5839     if node.master_candidate and self.might_demote and not self.lock_all:
5840       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5841       # check if after removing the current node, we're missing master
5842       # candidates
5843       (mc_remaining, mc_should, _) = \
5844           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5845       if mc_remaining < mc_should:
5846         raise errors.OpPrereqError("Not enough master candidates, please"
5847                                    " pass auto promote option to allow"
5848                                    " promotion", errors.ECODE_STATE)
5849
5850     self.old_flags = old_flags = (node.master_candidate,
5851                                   node.drained, node.offline)
5852     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5853     self.old_role = old_role = self._F2R[old_flags]
5854
5855     # Check for ineffective changes
5856     for attr in self._FLAGS:
5857       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5858         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5859         setattr(self.op, attr, None)
5860
5861     # Past this point, any flag change to False means a transition
5862     # away from the respective state, as only real changes are kept
5863
5864     # TODO: We might query the real power state if it supports OOB
5865     if _SupportsOob(self.cfg, node):
5866       if self.op.offline is False and not (node.powered or
5867                                            self.op.powered == True):
5868         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5869                                     " offline status can be reset") %
5870                                    self.op.node_name)
5871     elif self.op.powered is not None:
5872       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5873                                   " as it does not support out-of-band"
5874                                   " handling") % self.op.node_name)
5875
5876     # If we're being deofflined/drained, we'll MC ourself if needed
5877     if (self.op.drained == False or self.op.offline == False or
5878         (self.op.master_capable and not node.master_capable)):
5879       if _DecideSelfPromotion(self):
5880         self.op.master_candidate = True
5881         self.LogInfo("Auto-promoting node to master candidate")
5882
5883     # If we're no longer master capable, we'll demote ourselves from MC
5884     if self.op.master_capable == False and node.master_candidate:
5885       self.LogInfo("Demoting from master candidate")
5886       self.op.master_candidate = False
5887
5888     # Compute new role
5889     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5890     if self.op.master_candidate:
5891       new_role = self._ROLE_CANDIDATE
5892     elif self.op.drained:
5893       new_role = self._ROLE_DRAINED
5894     elif self.op.offline:
5895       new_role = self._ROLE_OFFLINE
5896     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5897       # False is still in new flags, which means we're un-setting (the
5898       # only) True flag
5899       new_role = self._ROLE_REGULAR
5900     else: # no new flags, nothing, keep old role
5901       new_role = old_role
5902
5903     self.new_role = new_role
5904
5905     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5906       # Trying to transition out of offline status
5907       # TODO: Use standard RPC runner, but make sure it works when the node is
5908       # still marked offline
5909       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5910       if result.fail_msg:
5911         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5912                                    " to report its version: %s" %
5913                                    (node.name, result.fail_msg),
5914                                    errors.ECODE_STATE)
5915       else:
5916         self.LogWarning("Transitioning node from offline to online state"
5917                         " without using re-add. Please make sure the node"
5918                         " is healthy!")
5919
5920     if self.op.secondary_ip:
5921       # Ok even without locking, because this can't be changed by any LU
5922       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5923       master_singlehomed = master.secondary_ip == master.primary_ip
5924       if master_singlehomed and self.op.secondary_ip:
5925         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5926                                    " homed cluster", errors.ECODE_INVAL)
5927
5928       assert not (frozenset(affected_instances) -
5929                   self.owned_locks(locking.LEVEL_INSTANCE))
5930
5931       if node.offline:
5932         if affected_instances:
5933           raise errors.OpPrereqError("Cannot change secondary IP address:"
5934                                      " offline node has instances (%s)"
5935                                      " configured to use it" %
5936                                      utils.CommaJoin(affected_instances.keys()))
5937       else:
5938         # On online nodes, check that no instances are running, and that
5939         # the node has the new ip and we can reach it.
5940         for instance in affected_instances.values():
5941           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5942                               msg="cannot change secondary ip")
5943
5944         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5945         if master.name != node.name:
5946           # check reachability from master secondary ip to new secondary ip
5947           if not netutils.TcpPing(self.op.secondary_ip,
5948                                   constants.DEFAULT_NODED_PORT,
5949                                   source=master.secondary_ip):
5950             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5951                                        " based ping to node daemon port",
5952                                        errors.ECODE_ENVIRON)
5953
5954     if self.op.ndparams:
5955       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5956       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5957       self.new_ndparams = new_ndparams
5958
5959     if self.op.hv_state:
5960       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5961                                                  self.node.hv_state_static)
5962
5963     if self.op.disk_state:
5964       self.new_disk_state = \
5965         _MergeAndVerifyDiskState(self.op.disk_state,
5966                                  self.node.disk_state_static)
5967
5968   def Exec(self, feedback_fn):
5969     """Modifies a node.
5970
5971     """
5972     node = self.node
5973     old_role = self.old_role
5974     new_role = self.new_role
5975
5976     result = []
5977
5978     if self.op.ndparams:
5979       node.ndparams = self.new_ndparams
5980
5981     if self.op.powered is not None:
5982       node.powered = self.op.powered
5983
5984     if self.op.hv_state:
5985       node.hv_state_static = self.new_hv_state
5986
5987     if self.op.disk_state:
5988       node.disk_state_static = self.new_disk_state
5989
5990     for attr in ["master_capable", "vm_capable"]:
5991       val = getattr(self.op, attr)
5992       if val is not None:
5993         setattr(node, attr, val)
5994         result.append((attr, str(val)))
5995
5996     if new_role != old_role:
5997       # Tell the node to demote itself, if no longer MC and not offline
5998       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5999         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6000         if msg:
6001           self.LogWarning("Node failed to demote itself: %s", msg)
6002
6003       new_flags = self._R2F[new_role]
6004       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6005         if of != nf:
6006           result.append((desc, str(nf)))
6007       (node.master_candidate, node.drained, node.offline) = new_flags
6008
6009       # we locked all nodes, we adjust the CP before updating this node
6010       if self.lock_all:
6011         _AdjustCandidatePool(self, [node.name])
6012
6013     if self.op.secondary_ip:
6014       node.secondary_ip = self.op.secondary_ip
6015       result.append(("secondary_ip", self.op.secondary_ip))
6016
6017     # this will trigger configuration file update, if needed
6018     self.cfg.Update(node, feedback_fn)
6019
6020     # this will trigger job queue propagation or cleanup if the mc
6021     # flag changed
6022     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6023       self.context.ReaddNode(node)
6024
6025     return result
6026
6027
6028 class LUNodePowercycle(NoHooksLU):
6029   """Powercycles a node.
6030
6031   """
6032   REQ_BGL = False
6033
6034   def CheckArguments(self):
6035     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6036     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6037       raise errors.OpPrereqError("The node is the master and the force"
6038                                  " parameter was not set",
6039                                  errors.ECODE_INVAL)
6040
6041   def ExpandNames(self):
6042     """Locking for PowercycleNode.
6043
6044     This is a last-resort option and shouldn't block on other
6045     jobs. Therefore, we grab no locks.
6046
6047     """
6048     self.needed_locks = {}
6049
6050   def Exec(self, feedback_fn):
6051     """Reboots a node.
6052
6053     """
6054     result = self.rpc.call_node_powercycle(self.op.node_name,
6055                                            self.cfg.GetHypervisorType())
6056     result.Raise("Failed to schedule the reboot")
6057     return result.payload
6058
6059
6060 class LUClusterQuery(NoHooksLU):
6061   """Query cluster configuration.
6062
6063   """
6064   REQ_BGL = False
6065
6066   def ExpandNames(self):
6067     self.needed_locks = {}
6068
6069   def Exec(self, feedback_fn):
6070     """Return cluster config.
6071
6072     """
6073     cluster = self.cfg.GetClusterInfo()
6074     os_hvp = {}
6075
6076     # Filter just for enabled hypervisors
6077     for os_name, hv_dict in cluster.os_hvp.items():
6078       os_hvp[os_name] = {}
6079       for hv_name, hv_params in hv_dict.items():
6080         if hv_name in cluster.enabled_hypervisors:
6081           os_hvp[os_name][hv_name] = hv_params
6082
6083     # Convert ip_family to ip_version
6084     primary_ip_version = constants.IP4_VERSION
6085     if cluster.primary_ip_family == netutils.IP6Address.family:
6086       primary_ip_version = constants.IP6_VERSION
6087
6088     result = {
6089       "software_version": constants.RELEASE_VERSION,
6090       "protocol_version": constants.PROTOCOL_VERSION,
6091       "config_version": constants.CONFIG_VERSION,
6092       "os_api_version": max(constants.OS_API_VERSIONS),
6093       "export_version": constants.EXPORT_VERSION,
6094       "architecture": (platform.architecture()[0], platform.machine()),
6095       "name": cluster.cluster_name,
6096       "master": cluster.master_node,
6097       "default_hypervisor": cluster.primary_hypervisor,
6098       "enabled_hypervisors": cluster.enabled_hypervisors,
6099       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6100                         for hypervisor_name in cluster.enabled_hypervisors]),
6101       "os_hvp": os_hvp,
6102       "beparams": cluster.beparams,
6103       "osparams": cluster.osparams,
6104       "ipolicy": cluster.ipolicy,
6105       "nicparams": cluster.nicparams,
6106       "ndparams": cluster.ndparams,
6107       "candidate_pool_size": cluster.candidate_pool_size,
6108       "master_netdev": cluster.master_netdev,
6109       "master_netmask": cluster.master_netmask,
6110       "use_external_mip_script": cluster.use_external_mip_script,
6111       "volume_group_name": cluster.volume_group_name,
6112       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6113       "file_storage_dir": cluster.file_storage_dir,
6114       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6115       "maintain_node_health": cluster.maintain_node_health,
6116       "ctime": cluster.ctime,
6117       "mtime": cluster.mtime,
6118       "uuid": cluster.uuid,
6119       "tags": list(cluster.GetTags()),
6120       "uid_pool": cluster.uid_pool,
6121       "default_iallocator": cluster.default_iallocator,
6122       "reserved_lvs": cluster.reserved_lvs,
6123       "primary_ip_version": primary_ip_version,
6124       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6125       "hidden_os": cluster.hidden_os,
6126       "blacklisted_os": cluster.blacklisted_os,
6127       }
6128
6129     return result
6130
6131
6132 class LUClusterConfigQuery(NoHooksLU):
6133   """Return configuration values.
6134
6135   """
6136   REQ_BGL = False
6137   _FIELDS_DYNAMIC = utils.FieldSet()
6138   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6139                                   "watcher_pause", "volume_group_name")
6140
6141   def CheckArguments(self):
6142     _CheckOutputFields(static=self._FIELDS_STATIC,
6143                        dynamic=self._FIELDS_DYNAMIC,
6144                        selected=self.op.output_fields)
6145
6146   def ExpandNames(self):
6147     self.needed_locks = {}
6148
6149   def Exec(self, feedback_fn):
6150     """Dump a representation of the cluster config to the standard output.
6151
6152     """
6153     values = []
6154     for field in self.op.output_fields:
6155       if field == "cluster_name":
6156         entry = self.cfg.GetClusterName()
6157       elif field == "master_node":
6158         entry = self.cfg.GetMasterNode()
6159       elif field == "drain_flag":
6160         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6161       elif field == "watcher_pause":
6162         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6163       elif field == "volume_group_name":
6164         entry = self.cfg.GetVGName()
6165       else:
6166         raise errors.ParameterError(field)
6167       values.append(entry)
6168     return values
6169
6170
6171 class LUInstanceActivateDisks(NoHooksLU):
6172   """Bring up an instance's disks.
6173
6174   """
6175   REQ_BGL = False
6176
6177   def ExpandNames(self):
6178     self._ExpandAndLockInstance()
6179     self.needed_locks[locking.LEVEL_NODE] = []
6180     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6181
6182   def DeclareLocks(self, level):
6183     if level == locking.LEVEL_NODE:
6184       self._LockInstancesNodes()
6185
6186   def CheckPrereq(self):
6187     """Check prerequisites.
6188
6189     This checks that the instance is in the cluster.
6190
6191     """
6192     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6193     assert self.instance is not None, \
6194       "Cannot retrieve locked instance %s" % self.op.instance_name
6195     _CheckNodeOnline(self, self.instance.primary_node)
6196
6197   def Exec(self, feedback_fn):
6198     """Activate the disks.
6199
6200     """
6201     disks_ok, disks_info = \
6202               _AssembleInstanceDisks(self, self.instance,
6203                                      ignore_size=self.op.ignore_size)
6204     if not disks_ok:
6205       raise errors.OpExecError("Cannot activate block devices")
6206
6207     return disks_info
6208
6209
6210 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6211                            ignore_size=False):
6212   """Prepare the block devices for an instance.
6213
6214   This sets up the block devices on all nodes.
6215
6216   @type lu: L{LogicalUnit}
6217   @param lu: the logical unit on whose behalf we execute
6218   @type instance: L{objects.Instance}
6219   @param instance: the instance for whose disks we assemble
6220   @type disks: list of L{objects.Disk} or None
6221   @param disks: which disks to assemble (or all, if None)
6222   @type ignore_secondaries: boolean
6223   @param ignore_secondaries: if true, errors on secondary nodes
6224       won't result in an error return from the function
6225   @type ignore_size: boolean
6226   @param ignore_size: if true, the current known size of the disk
6227       will not be used during the disk activation, useful for cases
6228       when the size is wrong
6229   @return: False if the operation failed, otherwise a list of
6230       (host, instance_visible_name, node_visible_name)
6231       with the mapping from node devices to instance devices
6232
6233   """
6234   device_info = []
6235   disks_ok = True
6236   iname = instance.name
6237   disks = _ExpandCheckDisks(instance, disks)
6238
6239   # With the two passes mechanism we try to reduce the window of
6240   # opportunity for the race condition of switching DRBD to primary
6241   # before handshaking occured, but we do not eliminate it
6242
6243   # The proper fix would be to wait (with some limits) until the
6244   # connection has been made and drbd transitions from WFConnection
6245   # into any other network-connected state (Connected, SyncTarget,
6246   # SyncSource, etc.)
6247
6248   # 1st pass, assemble on all nodes in secondary mode
6249   for idx, inst_disk in enumerate(disks):
6250     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6251       if ignore_size:
6252         node_disk = node_disk.Copy()
6253         node_disk.UnsetSize()
6254       lu.cfg.SetDiskID(node_disk, node)
6255       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6256       msg = result.fail_msg
6257       if msg:
6258         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6259                            " (is_primary=False, pass=1): %s",
6260                            inst_disk.iv_name, node, msg)
6261         if not ignore_secondaries:
6262           disks_ok = False
6263
6264   # FIXME: race condition on drbd migration to primary
6265
6266   # 2nd pass, do only the primary node
6267   for idx, inst_disk in enumerate(disks):
6268     dev_path = None
6269
6270     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6271       if node != instance.primary_node:
6272         continue
6273       if ignore_size:
6274         node_disk = node_disk.Copy()
6275         node_disk.UnsetSize()
6276       lu.cfg.SetDiskID(node_disk, node)
6277       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6278       msg = result.fail_msg
6279       if msg:
6280         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6281                            " (is_primary=True, pass=2): %s",
6282                            inst_disk.iv_name, node, msg)
6283         disks_ok = False
6284       else:
6285         dev_path = result.payload
6286
6287     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6288
6289   # leave the disks configured for the primary node
6290   # this is a workaround that would be fixed better by
6291   # improving the logical/physical id handling
6292   for disk in disks:
6293     lu.cfg.SetDiskID(disk, instance.primary_node)
6294
6295   return disks_ok, device_info
6296
6297
6298 def _StartInstanceDisks(lu, instance, force):
6299   """Start the disks of an instance.
6300
6301   """
6302   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6303                                            ignore_secondaries=force)
6304   if not disks_ok:
6305     _ShutdownInstanceDisks(lu, instance)
6306     if force is not None and not force:
6307       lu.proc.LogWarning("", hint="If the message above refers to a"
6308                          " secondary node,"
6309                          " you can retry the operation using '--force'.")
6310     raise errors.OpExecError("Disk consistency error")
6311
6312
6313 class LUInstanceDeactivateDisks(NoHooksLU):
6314   """Shutdown an instance's disks.
6315
6316   """
6317   REQ_BGL = False
6318
6319   def ExpandNames(self):
6320     self._ExpandAndLockInstance()
6321     self.needed_locks[locking.LEVEL_NODE] = []
6322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6323
6324   def DeclareLocks(self, level):
6325     if level == locking.LEVEL_NODE:
6326       self._LockInstancesNodes()
6327
6328   def CheckPrereq(self):
6329     """Check prerequisites.
6330
6331     This checks that the instance is in the cluster.
6332
6333     """
6334     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6335     assert self.instance is not None, \
6336       "Cannot retrieve locked instance %s" % self.op.instance_name
6337
6338   def Exec(self, feedback_fn):
6339     """Deactivate the disks
6340
6341     """
6342     instance = self.instance
6343     if self.op.force:
6344       _ShutdownInstanceDisks(self, instance)
6345     else:
6346       _SafeShutdownInstanceDisks(self, instance)
6347
6348
6349 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6350   """Shutdown block devices of an instance.
6351
6352   This function checks if an instance is running, before calling
6353   _ShutdownInstanceDisks.
6354
6355   """
6356   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6357   _ShutdownInstanceDisks(lu, instance, disks=disks)
6358
6359
6360 def _ExpandCheckDisks(instance, disks):
6361   """Return the instance disks selected by the disks list
6362
6363   @type disks: list of L{objects.Disk} or None
6364   @param disks: selected disks
6365   @rtype: list of L{objects.Disk}
6366   @return: selected instance disks to act on
6367
6368   """
6369   if disks is None:
6370     return instance.disks
6371   else:
6372     if not set(disks).issubset(instance.disks):
6373       raise errors.ProgrammerError("Can only act on disks belonging to the"
6374                                    " target instance")
6375     return disks
6376
6377
6378 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6379   """Shutdown block devices of an instance.
6380
6381   This does the shutdown on all nodes of the instance.
6382
6383   If the ignore_primary is false, errors on the primary node are
6384   ignored.
6385
6386   """
6387   all_result = True
6388   disks = _ExpandCheckDisks(instance, disks)
6389
6390   for disk in disks:
6391     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6392       lu.cfg.SetDiskID(top_disk, node)
6393       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6394       msg = result.fail_msg
6395       if msg:
6396         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6397                       disk.iv_name, node, msg)
6398         if ((node == instance.primary_node and not ignore_primary) or
6399             (node != instance.primary_node and not result.offline)):
6400           all_result = False
6401   return all_result
6402
6403
6404 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6405   """Checks if a node has enough free memory.
6406
6407   This function check if a given node has the needed amount of free
6408   memory. In case the node has less memory or we cannot get the
6409   information from the node, this function raise an OpPrereqError
6410   exception.
6411
6412   @type lu: C{LogicalUnit}
6413   @param lu: a logical unit from which we get configuration data
6414   @type node: C{str}
6415   @param node: the node to check
6416   @type reason: C{str}
6417   @param reason: string to use in the error message
6418   @type requested: C{int}
6419   @param requested: the amount of memory in MiB to check for
6420   @type hypervisor_name: C{str}
6421   @param hypervisor_name: the hypervisor to ask for memory stats
6422   @rtype: integer
6423   @return: node current free memory
6424   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6425       we cannot check the node
6426
6427   """
6428   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6429   nodeinfo[node].Raise("Can't get data from node %s" % node,
6430                        prereq=True, ecode=errors.ECODE_ENVIRON)
6431   (_, _, (hv_info, )) = nodeinfo[node].payload
6432
6433   free_mem = hv_info.get("memory_free", None)
6434   if not isinstance(free_mem, int):
6435     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6436                                " was '%s'" % (node, free_mem),
6437                                errors.ECODE_ENVIRON)
6438   if requested > free_mem:
6439     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6440                                " needed %s MiB, available %s MiB" %
6441                                (node, reason, requested, free_mem),
6442                                errors.ECODE_NORES)
6443   return free_mem
6444
6445
6446 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6447   """Checks if nodes have enough free disk space in the all VGs.
6448
6449   This function check if all given nodes have the needed amount of
6450   free disk. In case any node has less disk or we cannot get the
6451   information from the node, this function raise an OpPrereqError
6452   exception.
6453
6454   @type lu: C{LogicalUnit}
6455   @param lu: a logical unit from which we get configuration data
6456   @type nodenames: C{list}
6457   @param nodenames: the list of node names to check
6458   @type req_sizes: C{dict}
6459   @param req_sizes: the hash of vg and corresponding amount of disk in
6460       MiB to check for
6461   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462       or we cannot check the node
6463
6464   """
6465   for vg, req_size in req_sizes.items():
6466     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6467
6468
6469 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6470   """Checks if nodes have enough free disk space in the specified VG.
6471
6472   This function check if all given nodes have the needed amount of
6473   free disk. In case any node has less disk or we cannot get the
6474   information from the node, this function raise an OpPrereqError
6475   exception.
6476
6477   @type lu: C{LogicalUnit}
6478   @param lu: a logical unit from which we get configuration data
6479   @type nodenames: C{list}
6480   @param nodenames: the list of node names to check
6481   @type vg: C{str}
6482   @param vg: the volume group to check
6483   @type requested: C{int}
6484   @param requested: the amount of disk in MiB to check for
6485   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6486       or we cannot check the node
6487
6488   """
6489   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6490   for node in nodenames:
6491     info = nodeinfo[node]
6492     info.Raise("Cannot get current information from node %s" % node,
6493                prereq=True, ecode=errors.ECODE_ENVIRON)
6494     (_, (vg_info, ), _) = info.payload
6495     vg_free = vg_info.get("vg_free", None)
6496     if not isinstance(vg_free, int):
6497       raise errors.OpPrereqError("Can't compute free disk space on node"
6498                                  " %s for vg %s, result was '%s'" %
6499                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6500     if requested > vg_free:
6501       raise errors.OpPrereqError("Not enough disk space on target node %s"
6502                                  " vg %s: required %d MiB, available %d MiB" %
6503                                  (node, vg, requested, vg_free),
6504                                  errors.ECODE_NORES)
6505
6506
6507 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6508   """Checks if nodes have enough physical CPUs
6509
6510   This function checks if all given nodes have the needed number of
6511   physical CPUs. In case any node has less CPUs or we cannot get the
6512   information from the node, this function raises an OpPrereqError
6513   exception.
6514
6515   @type lu: C{LogicalUnit}
6516   @param lu: a logical unit from which we get configuration data
6517   @type nodenames: C{list}
6518   @param nodenames: the list of node names to check
6519   @type requested: C{int}
6520   @param requested: the minimum acceptable number of physical CPUs
6521   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6522       or we cannot check the node
6523
6524   """
6525   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6526   for node in nodenames:
6527     info = nodeinfo[node]
6528     info.Raise("Cannot get current information from node %s" % node,
6529                prereq=True, ecode=errors.ECODE_ENVIRON)
6530     (_, _, (hv_info, )) = info.payload
6531     num_cpus = hv_info.get("cpu_total", None)
6532     if not isinstance(num_cpus, int):
6533       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6534                                  " on node %s, result was '%s'" %
6535                                  (node, num_cpus), errors.ECODE_ENVIRON)
6536     if requested > num_cpus:
6537       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6538                                  "required" % (node, num_cpus, requested),
6539                                  errors.ECODE_NORES)
6540
6541
6542 class LUInstanceStartup(LogicalUnit):
6543   """Starts an instance.
6544
6545   """
6546   HPATH = "instance-start"
6547   HTYPE = constants.HTYPE_INSTANCE
6548   REQ_BGL = False
6549
6550   def CheckArguments(self):
6551     # extra beparams
6552     if self.op.beparams:
6553       # fill the beparams dict
6554       objects.UpgradeBeParams(self.op.beparams)
6555       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6556
6557   def ExpandNames(self):
6558     self._ExpandAndLockInstance()
6559     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6560
6561   def DeclareLocks(self, level):
6562     if level == locking.LEVEL_NODE_RES:
6563       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6564
6565   def BuildHooksEnv(self):
6566     """Build hooks env.
6567
6568     This runs on master, primary and secondary nodes of the instance.
6569
6570     """
6571     env = {
6572       "FORCE": self.op.force,
6573       }
6574
6575     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6576
6577     return env
6578
6579   def BuildHooksNodes(self):
6580     """Build hooks nodes.
6581
6582     """
6583     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6584     return (nl, nl)
6585
6586   def CheckPrereq(self):
6587     """Check prerequisites.
6588
6589     This checks that the instance is in the cluster.
6590
6591     """
6592     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593     assert self.instance is not None, \
6594       "Cannot retrieve locked instance %s" % self.op.instance_name
6595
6596     # extra hvparams
6597     if self.op.hvparams:
6598       # check hypervisor parameter syntax (locally)
6599       cluster = self.cfg.GetClusterInfo()
6600       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6601       filled_hvp = cluster.FillHV(instance)
6602       filled_hvp.update(self.op.hvparams)
6603       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6604       hv_type.CheckParameterSyntax(filled_hvp)
6605       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6606
6607     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6608
6609     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6610
6611     if self.primary_offline and self.op.ignore_offline_nodes:
6612       self.proc.LogWarning("Ignoring offline primary node")
6613
6614       if self.op.hvparams or self.op.beparams:
6615         self.proc.LogWarning("Overridden parameters are ignored")
6616     else:
6617       _CheckNodeOnline(self, instance.primary_node)
6618
6619       bep = self.cfg.GetClusterInfo().FillBE(instance)
6620       bep.update(self.op.beparams)
6621
6622       # check bridges existence
6623       _CheckInstanceBridgesExist(self, instance)
6624
6625       remote_info = self.rpc.call_instance_info(instance.primary_node,
6626                                                 instance.name,
6627                                                 instance.hypervisor)
6628       remote_info.Raise("Error checking node %s" % instance.primary_node,
6629                         prereq=True, ecode=errors.ECODE_ENVIRON)
6630       if not remote_info.payload: # not running already
6631         _CheckNodeFreeMemory(self, instance.primary_node,
6632                              "starting instance %s" % instance.name,
6633                              bep[constants.BE_MINMEM], instance.hypervisor)
6634
6635   def Exec(self, feedback_fn):
6636     """Start the instance.
6637
6638     """
6639     instance = self.instance
6640     force = self.op.force
6641
6642     if not self.op.no_remember:
6643       self.cfg.MarkInstanceUp(instance.name)
6644
6645     if self.primary_offline:
6646       assert self.op.ignore_offline_nodes
6647       self.proc.LogInfo("Primary node offline, marked instance as started")
6648     else:
6649       node_current = instance.primary_node
6650
6651       _StartInstanceDisks(self, instance, force)
6652
6653       result = \
6654         self.rpc.call_instance_start(node_current,
6655                                      (instance, self.op.hvparams,
6656                                       self.op.beparams),
6657                                      self.op.startup_paused)
6658       msg = result.fail_msg
6659       if msg:
6660         _ShutdownInstanceDisks(self, instance)
6661         raise errors.OpExecError("Could not start instance: %s" % msg)
6662
6663
6664 class LUInstanceReboot(LogicalUnit):
6665   """Reboot an instance.
6666
6667   """
6668   HPATH = "instance-reboot"
6669   HTYPE = constants.HTYPE_INSTANCE
6670   REQ_BGL = False
6671
6672   def ExpandNames(self):
6673     self._ExpandAndLockInstance()
6674
6675   def BuildHooksEnv(self):
6676     """Build hooks env.
6677
6678     This runs on master, primary and secondary nodes of the instance.
6679
6680     """
6681     env = {
6682       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6683       "REBOOT_TYPE": self.op.reboot_type,
6684       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6685       }
6686
6687     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6688
6689     return env
6690
6691   def BuildHooksNodes(self):
6692     """Build hooks nodes.
6693
6694     """
6695     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6696     return (nl, nl)
6697
6698   def CheckPrereq(self):
6699     """Check prerequisites.
6700
6701     This checks that the instance is in the cluster.
6702
6703     """
6704     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705     assert self.instance is not None, \
6706       "Cannot retrieve locked instance %s" % self.op.instance_name
6707     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6708     _CheckNodeOnline(self, instance.primary_node)
6709
6710     # check bridges existence
6711     _CheckInstanceBridgesExist(self, instance)
6712
6713   def Exec(self, feedback_fn):
6714     """Reboot the instance.
6715
6716     """
6717     instance = self.instance
6718     ignore_secondaries = self.op.ignore_secondaries
6719     reboot_type = self.op.reboot_type
6720
6721     remote_info = self.rpc.call_instance_info(instance.primary_node,
6722                                               instance.name,
6723                                               instance.hypervisor)
6724     remote_info.Raise("Error checking node %s" % instance.primary_node)
6725     instance_running = bool(remote_info.payload)
6726
6727     node_current = instance.primary_node
6728
6729     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6730                                             constants.INSTANCE_REBOOT_HARD]:
6731       for disk in instance.disks:
6732         self.cfg.SetDiskID(disk, node_current)
6733       result = self.rpc.call_instance_reboot(node_current, instance,
6734                                              reboot_type,
6735                                              self.op.shutdown_timeout)
6736       result.Raise("Could not reboot instance")
6737     else:
6738       if instance_running:
6739         result = self.rpc.call_instance_shutdown(node_current, instance,
6740                                                  self.op.shutdown_timeout)
6741         result.Raise("Could not shutdown instance for full reboot")
6742         _ShutdownInstanceDisks(self, instance)
6743       else:
6744         self.LogInfo("Instance %s was already stopped, starting now",
6745                      instance.name)
6746       _StartInstanceDisks(self, instance, ignore_secondaries)
6747       result = self.rpc.call_instance_start(node_current,
6748                                             (instance, None, None), False)
6749       msg = result.fail_msg
6750       if msg:
6751         _ShutdownInstanceDisks(self, instance)
6752         raise errors.OpExecError("Could not start instance for"
6753                                  " full reboot: %s" % msg)
6754
6755     self.cfg.MarkInstanceUp(instance.name)
6756
6757
6758 class LUInstanceShutdown(LogicalUnit):
6759   """Shutdown an instance.
6760
6761   """
6762   HPATH = "instance-stop"
6763   HTYPE = constants.HTYPE_INSTANCE
6764   REQ_BGL = False
6765
6766   def ExpandNames(self):
6767     self._ExpandAndLockInstance()
6768
6769   def BuildHooksEnv(self):
6770     """Build hooks env.
6771
6772     This runs on master, primary and secondary nodes of the instance.
6773
6774     """
6775     env = _BuildInstanceHookEnvByObject(self, self.instance)
6776     env["TIMEOUT"] = self.op.timeout
6777     return env
6778
6779   def BuildHooksNodes(self):
6780     """Build hooks nodes.
6781
6782     """
6783     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6784     return (nl, nl)
6785
6786   def CheckPrereq(self):
6787     """Check prerequisites.
6788
6789     This checks that the instance is in the cluster.
6790
6791     """
6792     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6793     assert self.instance is not None, \
6794       "Cannot retrieve locked instance %s" % self.op.instance_name
6795
6796     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6797
6798     self.primary_offline = \
6799       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6800
6801     if self.primary_offline and self.op.ignore_offline_nodes:
6802       self.proc.LogWarning("Ignoring offline primary node")
6803     else:
6804       _CheckNodeOnline(self, self.instance.primary_node)
6805
6806   def Exec(self, feedback_fn):
6807     """Shutdown the instance.
6808
6809     """
6810     instance = self.instance
6811     node_current = instance.primary_node
6812     timeout = self.op.timeout
6813
6814     if not self.op.no_remember:
6815       self.cfg.MarkInstanceDown(instance.name)
6816
6817     if self.primary_offline:
6818       assert self.op.ignore_offline_nodes
6819       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6820     else:
6821       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6822       msg = result.fail_msg
6823       if msg:
6824         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6825
6826       _ShutdownInstanceDisks(self, instance)
6827
6828
6829 class LUInstanceReinstall(LogicalUnit):
6830   """Reinstall an instance.
6831
6832   """
6833   HPATH = "instance-reinstall"
6834   HTYPE = constants.HTYPE_INSTANCE
6835   REQ_BGL = False
6836
6837   def ExpandNames(self):
6838     self._ExpandAndLockInstance()
6839
6840   def BuildHooksEnv(self):
6841     """Build hooks env.
6842
6843     This runs on master, primary and secondary nodes of the instance.
6844
6845     """
6846     return _BuildInstanceHookEnvByObject(self, self.instance)
6847
6848   def BuildHooksNodes(self):
6849     """Build hooks nodes.
6850
6851     """
6852     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6853     return (nl, nl)
6854
6855   def CheckPrereq(self):
6856     """Check prerequisites.
6857
6858     This checks that the instance is in the cluster and is not running.
6859
6860     """
6861     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862     assert instance is not None, \
6863       "Cannot retrieve locked instance %s" % self.op.instance_name
6864     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6865                      " offline, cannot reinstall")
6866     for node in instance.secondary_nodes:
6867       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6868                        " cannot reinstall")
6869
6870     if instance.disk_template == constants.DT_DISKLESS:
6871       raise errors.OpPrereqError("Instance '%s' has no disks" %
6872                                  self.op.instance_name,
6873                                  errors.ECODE_INVAL)
6874     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6875
6876     if self.op.os_type is not None:
6877       # OS verification
6878       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6879       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6880       instance_os = self.op.os_type
6881     else:
6882       instance_os = instance.os
6883
6884     nodelist = list(instance.all_nodes)
6885
6886     if self.op.osparams:
6887       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6888       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6889       self.os_inst = i_osdict # the new dict (without defaults)
6890     else:
6891       self.os_inst = None
6892
6893     self.instance = instance
6894
6895   def Exec(self, feedback_fn):
6896     """Reinstall the instance.
6897
6898     """
6899     inst = self.instance
6900
6901     if self.op.os_type is not None:
6902       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6903       inst.os = self.op.os_type
6904       # Write to configuration
6905       self.cfg.Update(inst, feedback_fn)
6906
6907     _StartInstanceDisks(self, inst, None)
6908     try:
6909       feedback_fn("Running the instance OS create scripts...")
6910       # FIXME: pass debug option from opcode to backend
6911       result = self.rpc.call_instance_os_add(inst.primary_node,
6912                                              (inst, self.os_inst), True,
6913                                              self.op.debug_level)
6914       result.Raise("Could not install OS for instance %s on node %s" %
6915                    (inst.name, inst.primary_node))
6916     finally:
6917       _ShutdownInstanceDisks(self, inst)
6918
6919
6920 class LUInstanceRecreateDisks(LogicalUnit):
6921   """Recreate an instance's missing disks.
6922
6923   """
6924   HPATH = "instance-recreate-disks"
6925   HTYPE = constants.HTYPE_INSTANCE
6926   REQ_BGL = False
6927
6928   _MODIFYABLE = frozenset([
6929     constants.IDISK_SIZE,
6930     constants.IDISK_MODE,
6931     ])
6932
6933   # New or changed disk parameters may have different semantics
6934   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6935     constants.IDISK_ADOPT,
6936
6937     # TODO: Implement support changing VG while recreating
6938     constants.IDISK_VG,
6939     constants.IDISK_METAVG,
6940     ]))
6941
6942   def CheckArguments(self):
6943     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6944       # Normalize and convert deprecated list of disk indices
6945       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6946
6947     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6948     if duplicates:
6949       raise errors.OpPrereqError("Some disks have been specified more than"
6950                                  " once: %s" % utils.CommaJoin(duplicates),
6951                                  errors.ECODE_INVAL)
6952
6953     for (idx, params) in self.op.disks:
6954       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6955       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6956       if unsupported:
6957         raise errors.OpPrereqError("Parameters for disk %s try to change"
6958                                    " unmodifyable parameter(s): %s" %
6959                                    (idx, utils.CommaJoin(unsupported)),
6960                                    errors.ECODE_INVAL)
6961
6962   def ExpandNames(self):
6963     self._ExpandAndLockInstance()
6964     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6965     if self.op.nodes:
6966       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6967       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6968     else:
6969       self.needed_locks[locking.LEVEL_NODE] = []
6970     self.needed_locks[locking.LEVEL_NODE_RES] = []
6971
6972   def DeclareLocks(self, level):
6973     if level == locking.LEVEL_NODE:
6974       # if we replace the nodes, we only need to lock the old primary,
6975       # otherwise we need to lock all nodes for disk re-creation
6976       primary_only = bool(self.op.nodes)
6977       self._LockInstancesNodes(primary_only=primary_only)
6978     elif level == locking.LEVEL_NODE_RES:
6979       # Copy node locks
6980       self.needed_locks[locking.LEVEL_NODE_RES] = \
6981         self.needed_locks[locking.LEVEL_NODE][:]
6982
6983   def BuildHooksEnv(self):
6984     """Build hooks env.
6985
6986     This runs on master, primary and secondary nodes of the instance.
6987
6988     """
6989     return _BuildInstanceHookEnvByObject(self, self.instance)
6990
6991   def BuildHooksNodes(self):
6992     """Build hooks nodes.
6993
6994     """
6995     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6996     return (nl, nl)
6997
6998   def CheckPrereq(self):
6999     """Check prerequisites.
7000
7001     This checks that the instance is in the cluster and is not running.
7002
7003     """
7004     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7005     assert instance is not None, \
7006       "Cannot retrieve locked instance %s" % self.op.instance_name
7007     if self.op.nodes:
7008       if len(self.op.nodes) != len(instance.all_nodes):
7009         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7010                                    " %d replacement nodes were specified" %
7011                                    (instance.name, len(instance.all_nodes),
7012                                     len(self.op.nodes)),
7013                                    errors.ECODE_INVAL)
7014       assert instance.disk_template != constants.DT_DRBD8 or \
7015           len(self.op.nodes) == 2
7016       assert instance.disk_template != constants.DT_PLAIN or \
7017           len(self.op.nodes) == 1
7018       primary_node = self.op.nodes[0]
7019     else:
7020       primary_node = instance.primary_node
7021     _CheckNodeOnline(self, primary_node)
7022
7023     if instance.disk_template == constants.DT_DISKLESS:
7024       raise errors.OpPrereqError("Instance '%s' has no disks" %
7025                                  self.op.instance_name, errors.ECODE_INVAL)
7026
7027     # if we replace nodes *and* the old primary is offline, we don't
7028     # check
7029     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7030     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7031     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7032     if not (self.op.nodes and old_pnode.offline):
7033       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7034                           msg="cannot recreate disks")
7035
7036     if self.op.disks:
7037       self.disks = dict(self.op.disks)
7038     else:
7039       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7040
7041     maxidx = max(self.disks.keys())
7042     if maxidx >= len(instance.disks):
7043       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7044                                  errors.ECODE_INVAL)
7045
7046     if (self.op.nodes and
7047         sorted(self.disks.keys()) != range(len(instance.disks))):
7048       raise errors.OpPrereqError("Can't recreate disks partially and"
7049                                  " change the nodes at the same time",
7050                                  errors.ECODE_INVAL)
7051
7052     self.instance = instance
7053
7054   def Exec(self, feedback_fn):
7055     """Recreate the disks.
7056
7057     """
7058     instance = self.instance
7059
7060     assert (self.owned_locks(locking.LEVEL_NODE) ==
7061             self.owned_locks(locking.LEVEL_NODE_RES))
7062
7063     to_skip = []
7064     mods = [] # keeps track of needed changes
7065
7066     for idx, disk in enumerate(instance.disks):
7067       try:
7068         changes = self.disks[idx]
7069       except KeyError:
7070         # Disk should not be recreated
7071         to_skip.append(idx)
7072         continue
7073
7074       # update secondaries for disks, if needed
7075       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7076         # need to update the nodes and minors
7077         assert len(self.op.nodes) == 2
7078         assert len(disk.logical_id) == 6 # otherwise disk internals
7079                                          # have changed
7080         (_, _, old_port, _, _, old_secret) = disk.logical_id
7081         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7082         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7083                   new_minors[0], new_minors[1], old_secret)
7084         assert len(disk.logical_id) == len(new_id)
7085       else:
7086         new_id = None
7087
7088       mods.append((idx, new_id, changes))
7089
7090     # now that we have passed all asserts above, we can apply the mods
7091     # in a single run (to avoid partial changes)
7092     for idx, new_id, changes in mods:
7093       disk = instance.disks[idx]
7094       if new_id is not None:
7095         assert disk.dev_type == constants.LD_DRBD8
7096         disk.logical_id = new_id
7097       if changes:
7098         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7099                     mode=changes.get(constants.IDISK_MODE, None))
7100
7101     # change primary node, if needed
7102     if self.op.nodes:
7103       instance.primary_node = self.op.nodes[0]
7104       self.LogWarning("Changing the instance's nodes, you will have to"
7105                       " remove any disks left on the older nodes manually")
7106
7107     if self.op.nodes:
7108       self.cfg.Update(instance, feedback_fn)
7109
7110     _CreateDisks(self, instance, to_skip=to_skip)
7111
7112
7113 class LUInstanceRename(LogicalUnit):
7114   """Rename an instance.
7115
7116   """
7117   HPATH = "instance-rename"
7118   HTYPE = constants.HTYPE_INSTANCE
7119
7120   def CheckArguments(self):
7121     """Check arguments.
7122
7123     """
7124     if self.op.ip_check and not self.op.name_check:
7125       # TODO: make the ip check more flexible and not depend on the name check
7126       raise errors.OpPrereqError("IP address check requires a name check",
7127                                  errors.ECODE_INVAL)
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     env = _BuildInstanceHookEnvByObject(self, self.instance)
7136     env["INSTANCE_NEW_NAME"] = self.op.new_name
7137     return env
7138
7139   def BuildHooksNodes(self):
7140     """Build hooks nodes.
7141
7142     """
7143     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7144     return (nl, nl)
7145
7146   def CheckPrereq(self):
7147     """Check prerequisites.
7148
7149     This checks that the instance is in the cluster and is not running.
7150
7151     """
7152     self.op.instance_name = _ExpandInstanceName(self.cfg,
7153                                                 self.op.instance_name)
7154     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7155     assert instance is not None
7156     _CheckNodeOnline(self, instance.primary_node)
7157     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7158                         msg="cannot rename")
7159     self.instance = instance
7160
7161     new_name = self.op.new_name
7162     if self.op.name_check:
7163       hostname = netutils.GetHostname(name=new_name)
7164       if hostname.name != new_name:
7165         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7166                      hostname.name)
7167       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7168         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7169                                     " same as given hostname '%s'") %
7170                                     (hostname.name, self.op.new_name),
7171                                     errors.ECODE_INVAL)
7172       new_name = self.op.new_name = hostname.name
7173       if (self.op.ip_check and
7174           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7175         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7176                                    (hostname.ip, new_name),
7177                                    errors.ECODE_NOTUNIQUE)
7178
7179     instance_list = self.cfg.GetInstanceList()
7180     if new_name in instance_list and new_name != instance.name:
7181       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7182                                  new_name, errors.ECODE_EXISTS)
7183
7184   def Exec(self, feedback_fn):
7185     """Rename the instance.
7186
7187     """
7188     inst = self.instance
7189     old_name = inst.name
7190
7191     rename_file_storage = False
7192     if (inst.disk_template in constants.DTS_FILEBASED and
7193         self.op.new_name != inst.name):
7194       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7195       rename_file_storage = True
7196
7197     self.cfg.RenameInstance(inst.name, self.op.new_name)
7198     # Change the instance lock. This is definitely safe while we hold the BGL.
7199     # Otherwise the new lock would have to be added in acquired mode.
7200     assert self.REQ_BGL
7201     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7202     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7203
7204     # re-read the instance from the configuration after rename
7205     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7206
7207     if rename_file_storage:
7208       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7209       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7210                                                      old_file_storage_dir,
7211                                                      new_file_storage_dir)
7212       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7213                    " (but the instance has been renamed in Ganeti)" %
7214                    (inst.primary_node, old_file_storage_dir,
7215                     new_file_storage_dir))
7216
7217     _StartInstanceDisks(self, inst, None)
7218     try:
7219       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7220                                                  old_name, self.op.debug_level)
7221       msg = result.fail_msg
7222       if msg:
7223         msg = ("Could not run OS rename script for instance %s on node %s"
7224                " (but the instance has been renamed in Ganeti): %s" %
7225                (inst.name, inst.primary_node, msg))
7226         self.proc.LogWarning(msg)
7227     finally:
7228       _ShutdownInstanceDisks(self, inst)
7229
7230     return inst.name
7231
7232
7233 class LUInstanceRemove(LogicalUnit):
7234   """Remove an instance.
7235
7236   """
7237   HPATH = "instance-remove"
7238   HTYPE = constants.HTYPE_INSTANCE
7239   REQ_BGL = False
7240
7241   def ExpandNames(self):
7242     self._ExpandAndLockInstance()
7243     self.needed_locks[locking.LEVEL_NODE] = []
7244     self.needed_locks[locking.LEVEL_NODE_RES] = []
7245     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7246
7247   def DeclareLocks(self, level):
7248     if level == locking.LEVEL_NODE:
7249       self._LockInstancesNodes()
7250     elif level == locking.LEVEL_NODE_RES:
7251       # Copy node locks
7252       self.needed_locks[locking.LEVEL_NODE_RES] = \
7253         self.needed_locks[locking.LEVEL_NODE][:]
7254
7255   def BuildHooksEnv(self):
7256     """Build hooks env.
7257
7258     This runs on master, primary and secondary nodes of the instance.
7259
7260     """
7261     env = _BuildInstanceHookEnvByObject(self, self.instance)
7262     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7263     return env
7264
7265   def BuildHooksNodes(self):
7266     """Build hooks nodes.
7267
7268     """
7269     nl = [self.cfg.GetMasterNode()]
7270     nl_post = list(self.instance.all_nodes) + nl
7271     return (nl, nl_post)
7272
7273   def CheckPrereq(self):
7274     """Check prerequisites.
7275
7276     This checks that the instance is in the cluster.
7277
7278     """
7279     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7280     assert self.instance is not None, \
7281       "Cannot retrieve locked instance %s" % self.op.instance_name
7282
7283   def Exec(self, feedback_fn):
7284     """Remove the instance.
7285
7286     """
7287     instance = self.instance
7288     logging.info("Shutting down instance %s on node %s",
7289                  instance.name, instance.primary_node)
7290
7291     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7292                                              self.op.shutdown_timeout)
7293     msg = result.fail_msg
7294     if msg:
7295       if self.op.ignore_failures:
7296         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7297       else:
7298         raise errors.OpExecError("Could not shutdown instance %s on"
7299                                  " node %s: %s" %
7300                                  (instance.name, instance.primary_node, msg))
7301
7302     assert (self.owned_locks(locking.LEVEL_NODE) ==
7303             self.owned_locks(locking.LEVEL_NODE_RES))
7304     assert not (set(instance.all_nodes) -
7305                 self.owned_locks(locking.LEVEL_NODE)), \
7306       "Not owning correct locks"
7307
7308     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7309
7310
7311 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7312   """Utility function to remove an instance.
7313
7314   """
7315   logging.info("Removing block devices for instance %s", instance.name)
7316
7317   if not _RemoveDisks(lu, instance):
7318     if not ignore_failures:
7319       raise errors.OpExecError("Can't remove instance's disks")
7320     feedback_fn("Warning: can't remove instance's disks")
7321
7322   logging.info("Removing instance %s out of cluster config", instance.name)
7323
7324   lu.cfg.RemoveInstance(instance.name)
7325
7326   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7327     "Instance lock removal conflict"
7328
7329   # Remove lock for the instance
7330   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7331
7332
7333 class LUInstanceQuery(NoHooksLU):
7334   """Logical unit for querying instances.
7335
7336   """
7337   # pylint: disable=W0142
7338   REQ_BGL = False
7339
7340   def CheckArguments(self):
7341     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7342                              self.op.output_fields, self.op.use_locking)
7343
7344   def ExpandNames(self):
7345     self.iq.ExpandNames(self)
7346
7347   def DeclareLocks(self, level):
7348     self.iq.DeclareLocks(self, level)
7349
7350   def Exec(self, feedback_fn):
7351     return self.iq.OldStyleQuery(self)
7352
7353
7354 class LUInstanceFailover(LogicalUnit):
7355   """Failover an instance.
7356
7357   """
7358   HPATH = "instance-failover"
7359   HTYPE = constants.HTYPE_INSTANCE
7360   REQ_BGL = False
7361
7362   def CheckArguments(self):
7363     """Check the arguments.
7364
7365     """
7366     self.iallocator = getattr(self.op, "iallocator", None)
7367     self.target_node = getattr(self.op, "target_node", None)
7368
7369   def ExpandNames(self):
7370     self._ExpandAndLockInstance()
7371
7372     if self.op.target_node is not None:
7373       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7374
7375     self.needed_locks[locking.LEVEL_NODE] = []
7376     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7377
7378     self.needed_locks[locking.LEVEL_NODE_RES] = []
7379     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7380
7381     ignore_consistency = self.op.ignore_consistency
7382     shutdown_timeout = self.op.shutdown_timeout
7383     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7384                                        cleanup=False,
7385                                        failover=True,
7386                                        ignore_consistency=ignore_consistency,
7387                                        shutdown_timeout=shutdown_timeout,
7388                                        ignore_ipolicy=self.op.ignore_ipolicy)
7389     self.tasklets = [self._migrater]
7390
7391   def DeclareLocks(self, level):
7392     if level == locking.LEVEL_NODE:
7393       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7394       if instance.disk_template in constants.DTS_EXT_MIRROR:
7395         if self.op.target_node is None:
7396           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7397         else:
7398           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7399                                                    self.op.target_node]
7400         del self.recalculate_locks[locking.LEVEL_NODE]
7401       else:
7402         self._LockInstancesNodes()
7403     elif level == locking.LEVEL_NODE_RES:
7404       # Copy node locks
7405       self.needed_locks[locking.LEVEL_NODE_RES] = \
7406         self.needed_locks[locking.LEVEL_NODE][:]
7407
7408   def BuildHooksEnv(self):
7409     """Build hooks env.
7410
7411     This runs on master, primary and secondary nodes of the instance.
7412
7413     """
7414     instance = self._migrater.instance
7415     source_node = instance.primary_node
7416     target_node = self.op.target_node
7417     env = {
7418       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7419       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7420       "OLD_PRIMARY": source_node,
7421       "NEW_PRIMARY": target_node,
7422       }
7423
7424     if instance.disk_template in constants.DTS_INT_MIRROR:
7425       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7426       env["NEW_SECONDARY"] = source_node
7427     else:
7428       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7429
7430     env.update(_BuildInstanceHookEnvByObject(self, instance))
7431
7432     return env
7433
7434   def BuildHooksNodes(self):
7435     """Build hooks nodes.
7436
7437     """
7438     instance = self._migrater.instance
7439     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7440     return (nl, nl + [instance.primary_node])
7441
7442
7443 class LUInstanceMigrate(LogicalUnit):
7444   """Migrate an instance.
7445
7446   This is migration without shutting down, compared to the failover,
7447   which is done with shutdown.
7448
7449   """
7450   HPATH = "instance-migrate"
7451   HTYPE = constants.HTYPE_INSTANCE
7452   REQ_BGL = False
7453
7454   def ExpandNames(self):
7455     self._ExpandAndLockInstance()
7456
7457     if self.op.target_node is not None:
7458       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7459
7460     self.needed_locks[locking.LEVEL_NODE] = []
7461     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7462
7463     self.needed_locks[locking.LEVEL_NODE] = []
7464     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7465
7466     self._migrater = \
7467       TLMigrateInstance(self, self.op.instance_name,
7468                         cleanup=self.op.cleanup,
7469                         failover=False,
7470                         fallback=self.op.allow_failover,
7471                         allow_runtime_changes=self.op.allow_runtime_changes,
7472                         ignore_ipolicy=self.op.ignore_ipolicy)
7473     self.tasklets = [self._migrater]
7474
7475   def DeclareLocks(self, level):
7476     if level == locking.LEVEL_NODE:
7477       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7478       if instance.disk_template in constants.DTS_EXT_MIRROR:
7479         if self.op.target_node is None:
7480           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7481         else:
7482           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7483                                                    self.op.target_node]
7484         del self.recalculate_locks[locking.LEVEL_NODE]
7485       else:
7486         self._LockInstancesNodes()
7487     elif level == locking.LEVEL_NODE_RES:
7488       # Copy node locks
7489       self.needed_locks[locking.LEVEL_NODE_RES] = \
7490         self.needed_locks[locking.LEVEL_NODE][:]
7491
7492   def BuildHooksEnv(self):
7493     """Build hooks env.
7494
7495     This runs on master, primary and secondary nodes of the instance.
7496
7497     """
7498     instance = self._migrater.instance
7499     source_node = instance.primary_node
7500     target_node = self.op.target_node
7501     env = _BuildInstanceHookEnvByObject(self, instance)
7502     env.update({
7503       "MIGRATE_LIVE": self._migrater.live,
7504       "MIGRATE_CLEANUP": self.op.cleanup,
7505       "OLD_PRIMARY": source_node,
7506       "NEW_PRIMARY": target_node,
7507       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7508       })
7509
7510     if instance.disk_template in constants.DTS_INT_MIRROR:
7511       env["OLD_SECONDARY"] = target_node
7512       env["NEW_SECONDARY"] = source_node
7513     else:
7514       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7515
7516     return env
7517
7518   def BuildHooksNodes(self):
7519     """Build hooks nodes.
7520
7521     """
7522     instance = self._migrater.instance
7523     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7524     return (nl, nl + [instance.primary_node])
7525
7526
7527 class LUInstanceMove(LogicalUnit):
7528   """Move an instance by data-copying.
7529
7530   """
7531   HPATH = "instance-move"
7532   HTYPE = constants.HTYPE_INSTANCE
7533   REQ_BGL = False
7534
7535   def ExpandNames(self):
7536     self._ExpandAndLockInstance()
7537     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7538     self.op.target_node = target_node
7539     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7540     self.needed_locks[locking.LEVEL_NODE_RES] = []
7541     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7542
7543   def DeclareLocks(self, level):
7544     if level == locking.LEVEL_NODE:
7545       self._LockInstancesNodes(primary_only=True)
7546     elif level == locking.LEVEL_NODE_RES:
7547       # Copy node locks
7548       self.needed_locks[locking.LEVEL_NODE_RES] = \
7549         self.needed_locks[locking.LEVEL_NODE][:]
7550
7551   def BuildHooksEnv(self):
7552     """Build hooks env.
7553
7554     This runs on master, primary and secondary nodes of the instance.
7555
7556     """
7557     env = {
7558       "TARGET_NODE": self.op.target_node,
7559       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7560       }
7561     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7562     return env
7563
7564   def BuildHooksNodes(self):
7565     """Build hooks nodes.
7566
7567     """
7568     nl = [
7569       self.cfg.GetMasterNode(),
7570       self.instance.primary_node,
7571       self.op.target_node,
7572       ]
7573     return (nl, nl)
7574
7575   def CheckPrereq(self):
7576     """Check prerequisites.
7577
7578     This checks that the instance is in the cluster.
7579
7580     """
7581     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7582     assert self.instance is not None, \
7583       "Cannot retrieve locked instance %s" % self.op.instance_name
7584
7585     node = self.cfg.GetNodeInfo(self.op.target_node)
7586     assert node is not None, \
7587       "Cannot retrieve locked node %s" % self.op.target_node
7588
7589     self.target_node = target_node = node.name
7590
7591     if target_node == instance.primary_node:
7592       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7593                                  (instance.name, target_node),
7594                                  errors.ECODE_STATE)
7595
7596     bep = self.cfg.GetClusterInfo().FillBE(instance)
7597
7598     for idx, dsk in enumerate(instance.disks):
7599       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7600         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7601                                    " cannot copy" % idx, errors.ECODE_STATE)
7602
7603     _CheckNodeOnline(self, target_node)
7604     _CheckNodeNotDrained(self, target_node)
7605     _CheckNodeVmCapable(self, target_node)
7606     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7607                                      self.cfg.GetNodeGroup(node.group))
7608     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7609                             ignore=self.op.ignore_ipolicy)
7610
7611     if instance.admin_state == constants.ADMINST_UP:
7612       # check memory requirements on the secondary node
7613       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7614                            instance.name, bep[constants.BE_MAXMEM],
7615                            instance.hypervisor)
7616     else:
7617       self.LogInfo("Not checking memory on the secondary node as"
7618                    " instance will not be started")
7619
7620     # check bridge existance
7621     _CheckInstanceBridgesExist(self, instance, node=target_node)
7622
7623   def Exec(self, feedback_fn):
7624     """Move an instance.
7625
7626     The move is done by shutting it down on its present node, copying
7627     the data over (slow) and starting it on the new node.
7628
7629     """
7630     instance = self.instance
7631
7632     source_node = instance.primary_node
7633     target_node = self.target_node
7634
7635     self.LogInfo("Shutting down instance %s on source node %s",
7636                  instance.name, source_node)
7637
7638     assert (self.owned_locks(locking.LEVEL_NODE) ==
7639             self.owned_locks(locking.LEVEL_NODE_RES))
7640
7641     result = self.rpc.call_instance_shutdown(source_node, instance,
7642                                              self.op.shutdown_timeout)
7643     msg = result.fail_msg
7644     if msg:
7645       if self.op.ignore_consistency:
7646         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7647                              " Proceeding anyway. Please make sure node"
7648                              " %s is down. Error details: %s",
7649                              instance.name, source_node, source_node, msg)
7650       else:
7651         raise errors.OpExecError("Could not shutdown instance %s on"
7652                                  " node %s: %s" %
7653                                  (instance.name, source_node, msg))
7654
7655     # create the target disks
7656     try:
7657       _CreateDisks(self, instance, target_node=target_node)
7658     except errors.OpExecError:
7659       self.LogWarning("Device creation failed, reverting...")
7660       try:
7661         _RemoveDisks(self, instance, target_node=target_node)
7662       finally:
7663         self.cfg.ReleaseDRBDMinors(instance.name)
7664         raise
7665
7666     cluster_name = self.cfg.GetClusterInfo().cluster_name
7667
7668     errs = []
7669     # activate, get path, copy the data over
7670     for idx, disk in enumerate(instance.disks):
7671       self.LogInfo("Copying data for disk %d", idx)
7672       result = self.rpc.call_blockdev_assemble(target_node, disk,
7673                                                instance.name, True, idx)
7674       if result.fail_msg:
7675         self.LogWarning("Can't assemble newly created disk %d: %s",
7676                         idx, result.fail_msg)
7677         errs.append(result.fail_msg)
7678         break
7679       dev_path = result.payload
7680       result = self.rpc.call_blockdev_export(source_node, disk,
7681                                              target_node, dev_path,
7682                                              cluster_name)
7683       if result.fail_msg:
7684         self.LogWarning("Can't copy data over for disk %d: %s",
7685                         idx, result.fail_msg)
7686         errs.append(result.fail_msg)
7687         break
7688
7689     if errs:
7690       self.LogWarning("Some disks failed to copy, aborting")
7691       try:
7692         _RemoveDisks(self, instance, target_node=target_node)
7693       finally:
7694         self.cfg.ReleaseDRBDMinors(instance.name)
7695         raise errors.OpExecError("Errors during disk copy: %s" %
7696                                  (",".join(errs),))
7697
7698     instance.primary_node = target_node
7699     self.cfg.Update(instance, feedback_fn)
7700
7701     self.LogInfo("Removing the disks on the original node")
7702     _RemoveDisks(self, instance, target_node=source_node)
7703
7704     # Only start the instance if it's marked as up
7705     if instance.admin_state == constants.ADMINST_UP:
7706       self.LogInfo("Starting instance %s on node %s",
7707                    instance.name, target_node)
7708
7709       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7710                                            ignore_secondaries=True)
7711       if not disks_ok:
7712         _ShutdownInstanceDisks(self, instance)
7713         raise errors.OpExecError("Can't activate the instance's disks")
7714
7715       result = self.rpc.call_instance_start(target_node,
7716                                             (instance, None, None), False)
7717       msg = result.fail_msg
7718       if msg:
7719         _ShutdownInstanceDisks(self, instance)
7720         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7721                                  (instance.name, target_node, msg))
7722
7723
7724 class LUNodeMigrate(LogicalUnit):
7725   """Migrate all instances from a node.
7726
7727   """
7728   HPATH = "node-migrate"
7729   HTYPE = constants.HTYPE_NODE
7730   REQ_BGL = False
7731
7732   def CheckArguments(self):
7733     pass
7734
7735   def ExpandNames(self):
7736     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7737
7738     self.share_locks = _ShareAll()
7739     self.needed_locks = {
7740       locking.LEVEL_NODE: [self.op.node_name],
7741       }
7742
7743   def BuildHooksEnv(self):
7744     """Build hooks env.
7745
7746     This runs on the master, the primary and all the secondaries.
7747
7748     """
7749     return {
7750       "NODE_NAME": self.op.node_name,
7751       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7752       }
7753
7754   def BuildHooksNodes(self):
7755     """Build hooks nodes.
7756
7757     """
7758     nl = [self.cfg.GetMasterNode()]
7759     return (nl, nl)
7760
7761   def CheckPrereq(self):
7762     pass
7763
7764   def Exec(self, feedback_fn):
7765     # Prepare jobs for migration instances
7766     allow_runtime_changes = self.op.allow_runtime_changes
7767     jobs = [
7768       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7769                                  mode=self.op.mode,
7770                                  live=self.op.live,
7771                                  iallocator=self.op.iallocator,
7772                                  target_node=self.op.target_node,
7773                                  allow_runtime_changes=allow_runtime_changes,
7774                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7775       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7776       ]
7777
7778     # TODO: Run iallocator in this opcode and pass correct placement options to
7779     # OpInstanceMigrate. Since other jobs can modify the cluster between
7780     # running the iallocator and the actual migration, a good consistency model
7781     # will have to be found.
7782
7783     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7784             frozenset([self.op.node_name]))
7785
7786     return ResultWithJobs(jobs)
7787
7788
7789 class TLMigrateInstance(Tasklet):
7790   """Tasklet class for instance migration.
7791
7792   @type live: boolean
7793   @ivar live: whether the migration will be done live or non-live;
7794       this variable is initalized only after CheckPrereq has run
7795   @type cleanup: boolean
7796   @ivar cleanup: Wheater we cleanup from a failed migration
7797   @type iallocator: string
7798   @ivar iallocator: The iallocator used to determine target_node
7799   @type target_node: string
7800   @ivar target_node: If given, the target_node to reallocate the instance to
7801   @type failover: boolean
7802   @ivar failover: Whether operation results in failover or migration
7803   @type fallback: boolean
7804   @ivar fallback: Whether fallback to failover is allowed if migration not
7805                   possible
7806   @type ignore_consistency: boolean
7807   @ivar ignore_consistency: Wheter we should ignore consistency between source
7808                             and target node
7809   @type shutdown_timeout: int
7810   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7811   @type ignore_ipolicy: bool
7812   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7813
7814   """
7815
7816   # Constants
7817   _MIGRATION_POLL_INTERVAL = 1      # seconds
7818   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7819
7820   def __init__(self, lu, instance_name, cleanup=False,
7821                failover=False, fallback=False,
7822                ignore_consistency=False,
7823                allow_runtime_changes=True,
7824                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7825                ignore_ipolicy=False):
7826     """Initializes this class.
7827
7828     """
7829     Tasklet.__init__(self, lu)
7830
7831     # Parameters
7832     self.instance_name = instance_name
7833     self.cleanup = cleanup
7834     self.live = False # will be overridden later
7835     self.failover = failover
7836     self.fallback = fallback
7837     self.ignore_consistency = ignore_consistency
7838     self.shutdown_timeout = shutdown_timeout
7839     self.ignore_ipolicy = ignore_ipolicy
7840     self.allow_runtime_changes = allow_runtime_changes
7841
7842   def CheckPrereq(self):
7843     """Check prerequisites.
7844
7845     This checks that the instance is in the cluster.
7846
7847     """
7848     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7849     instance = self.cfg.GetInstanceInfo(instance_name)
7850     assert instance is not None
7851     self.instance = instance
7852     cluster = self.cfg.GetClusterInfo()
7853
7854     if (not self.cleanup and
7855         not instance.admin_state == constants.ADMINST_UP and
7856         not self.failover and self.fallback):
7857       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7858                       " switching to failover")
7859       self.failover = True
7860
7861     if instance.disk_template not in constants.DTS_MIRRORED:
7862       if self.failover:
7863         text = "failovers"
7864       else:
7865         text = "migrations"
7866       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7867                                  " %s" % (instance.disk_template, text),
7868                                  errors.ECODE_STATE)
7869
7870     if instance.disk_template in constants.DTS_EXT_MIRROR:
7871       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7872
7873       if self.lu.op.iallocator:
7874         self._RunAllocator()
7875       else:
7876         # We set set self.target_node as it is required by
7877         # BuildHooksEnv
7878         self.target_node = self.lu.op.target_node
7879
7880       # Check that the target node is correct in terms of instance policy
7881       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7882       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7883       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7884       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7885                               ignore=self.ignore_ipolicy)
7886
7887       # self.target_node is already populated, either directly or by the
7888       # iallocator run
7889       target_node = self.target_node
7890       if self.target_node == instance.primary_node:
7891         raise errors.OpPrereqError("Cannot migrate instance %s"
7892                                    " to its primary (%s)" %
7893                                    (instance.name, instance.primary_node))
7894
7895       if len(self.lu.tasklets) == 1:
7896         # It is safe to release locks only when we're the only tasklet
7897         # in the LU
7898         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7899                       keep=[instance.primary_node, self.target_node])
7900
7901     else:
7902       secondary_nodes = instance.secondary_nodes
7903       if not secondary_nodes:
7904         raise errors.ConfigurationError("No secondary node but using"
7905                                         " %s disk template" %
7906                                         instance.disk_template)
7907       target_node = secondary_nodes[0]
7908       if self.lu.op.iallocator or (self.lu.op.target_node and
7909                                    self.lu.op.target_node != target_node):
7910         if self.failover:
7911           text = "failed over"
7912         else:
7913           text = "migrated"
7914         raise errors.OpPrereqError("Instances with disk template %s cannot"
7915                                    " be %s to arbitrary nodes"
7916                                    " (neither an iallocator nor a target"
7917                                    " node can be passed)" %
7918                                    (instance.disk_template, text),
7919                                    errors.ECODE_INVAL)
7920       nodeinfo = self.cfg.GetNodeInfo(target_node)
7921       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7922       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7923       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7924                               ignore=self.ignore_ipolicy)
7925
7926     i_be = cluster.FillBE(instance)
7927
7928     # check memory requirements on the secondary node
7929     if (not self.cleanup and
7930          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7931       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7932                                                "migrating instance %s" %
7933                                                instance.name,
7934                                                i_be[constants.BE_MINMEM],
7935                                                instance.hypervisor)
7936     else:
7937       self.lu.LogInfo("Not checking memory on the secondary node as"
7938                       " instance will not be started")
7939
7940     # check if failover must be forced instead of migration
7941     if (not self.cleanup and not self.failover and
7942         i_be[constants.BE_ALWAYS_FAILOVER]):
7943       if self.fallback:
7944         self.lu.LogInfo("Instance configured to always failover; fallback"
7945                         " to failover")
7946         self.failover = True
7947       else:
7948         raise errors.OpPrereqError("This instance has been configured to"
7949                                    " always failover, please allow failover",
7950                                    errors.ECODE_STATE)
7951
7952     # check bridge existance
7953     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7954
7955     if not self.cleanup:
7956       _CheckNodeNotDrained(self.lu, target_node)
7957       if not self.failover:
7958         result = self.rpc.call_instance_migratable(instance.primary_node,
7959                                                    instance)
7960         if result.fail_msg and self.fallback:
7961           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7962                           " failover")
7963           self.failover = True
7964         else:
7965           result.Raise("Can't migrate, please use failover",
7966                        prereq=True, ecode=errors.ECODE_STATE)
7967
7968     assert not (self.failover and self.cleanup)
7969
7970     if not self.failover:
7971       if self.lu.op.live is not None and self.lu.op.mode is not None:
7972         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7973                                    " parameters are accepted",
7974                                    errors.ECODE_INVAL)
7975       if self.lu.op.live is not None:
7976         if self.lu.op.live:
7977           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7978         else:
7979           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7980         # reset the 'live' parameter to None so that repeated
7981         # invocations of CheckPrereq do not raise an exception
7982         self.lu.op.live = None
7983       elif self.lu.op.mode is None:
7984         # read the default value from the hypervisor
7985         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7986         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7987
7988       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7989     else:
7990       # Failover is never live
7991       self.live = False
7992
7993     if not (self.failover or self.cleanup):
7994       remote_info = self.rpc.call_instance_info(instance.primary_node,
7995                                                 instance.name,
7996                                                 instance.hypervisor)
7997       remote_info.Raise("Error checking instance on node %s" %
7998                         instance.primary_node)
7999       instance_running = bool(remote_info.payload)
8000       if instance_running:
8001         self.current_mem = int(remote_info.payload["memory"])
8002
8003   def _RunAllocator(self):
8004     """Run the allocator based on input opcode.
8005
8006     """
8007     # FIXME: add a self.ignore_ipolicy option
8008     ial = IAllocator(self.cfg, self.rpc,
8009                      mode=constants.IALLOCATOR_MODE_RELOC,
8010                      name=self.instance_name,
8011                      # TODO See why hail breaks with a single node below
8012                      relocate_from=[self.instance.primary_node,
8013                                     self.instance.primary_node],
8014                      )
8015
8016     ial.Run(self.lu.op.iallocator)
8017
8018     if not ial.success:
8019       raise errors.OpPrereqError("Can't compute nodes using"
8020                                  " iallocator '%s': %s" %
8021                                  (self.lu.op.iallocator, ial.info),
8022                                  errors.ECODE_NORES)
8023     if len(ial.result) != ial.required_nodes:
8024       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8025                                  " of nodes (%s), required %s" %
8026                                  (self.lu.op.iallocator, len(ial.result),
8027                                   ial.required_nodes), errors.ECODE_FAULT)
8028     self.target_node = ial.result[0]
8029     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8030                  self.instance_name, self.lu.op.iallocator,
8031                  utils.CommaJoin(ial.result))
8032
8033   def _WaitUntilSync(self):
8034     """Poll with custom rpc for disk sync.
8035
8036     This uses our own step-based rpc call.
8037
8038     """
8039     self.feedback_fn("* wait until resync is done")
8040     all_done = False
8041     while not all_done:
8042       all_done = True
8043       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8044                                             self.nodes_ip,
8045                                             self.instance.disks)
8046       min_percent = 100
8047       for node, nres in result.items():
8048         nres.Raise("Cannot resync disks on node %s" % node)
8049         node_done, node_percent = nres.payload
8050         all_done = all_done and node_done
8051         if node_percent is not None:
8052           min_percent = min(min_percent, node_percent)
8053       if not all_done:
8054         if min_percent < 100:
8055           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8056         time.sleep(2)
8057
8058   def _EnsureSecondary(self, node):
8059     """Demote a node to secondary.
8060
8061     """
8062     self.feedback_fn("* switching node %s to secondary mode" % node)
8063
8064     for dev in self.instance.disks:
8065       self.cfg.SetDiskID(dev, node)
8066
8067     result = self.rpc.call_blockdev_close(node, self.instance.name,
8068                                           self.instance.disks)
8069     result.Raise("Cannot change disk to secondary on node %s" % node)
8070
8071   def _GoStandalone(self):
8072     """Disconnect from the network.
8073
8074     """
8075     self.feedback_fn("* changing into standalone mode")
8076     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8077                                                self.instance.disks)
8078     for node, nres in result.items():
8079       nres.Raise("Cannot disconnect disks node %s" % node)
8080
8081   def _GoReconnect(self, multimaster):
8082     """Reconnect to the network.
8083
8084     """
8085     if multimaster:
8086       msg = "dual-master"
8087     else:
8088       msg = "single-master"
8089     self.feedback_fn("* changing disks into %s mode" % msg)
8090     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8091                                            self.instance.disks,
8092                                            self.instance.name, multimaster)
8093     for node, nres in result.items():
8094       nres.Raise("Cannot change disks config on node %s" % node)
8095
8096   def _ExecCleanup(self):
8097     """Try to cleanup after a failed migration.
8098
8099     The cleanup is done by:
8100       - check that the instance is running only on one node
8101         (and update the config if needed)
8102       - change disks on its secondary node to secondary
8103       - wait until disks are fully synchronized
8104       - disconnect from the network
8105       - change disks into single-master mode
8106       - wait again until disks are fully synchronized
8107
8108     """
8109     instance = self.instance
8110     target_node = self.target_node
8111     source_node = self.source_node
8112
8113     # check running on only one node
8114     self.feedback_fn("* checking where the instance actually runs"
8115                      " (if this hangs, the hypervisor might be in"
8116                      " a bad state)")
8117     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8118     for node, result in ins_l.items():
8119       result.Raise("Can't contact node %s" % node)
8120
8121     runningon_source = instance.name in ins_l[source_node].payload
8122     runningon_target = instance.name in ins_l[target_node].payload
8123
8124     if runningon_source and runningon_target:
8125       raise errors.OpExecError("Instance seems to be running on two nodes,"
8126                                " or the hypervisor is confused; you will have"
8127                                " to ensure manually that it runs only on one"
8128                                " and restart this operation")
8129
8130     if not (runningon_source or runningon_target):
8131       raise errors.OpExecError("Instance does not seem to be running at all;"
8132                                " in this case it's safer to repair by"
8133                                " running 'gnt-instance stop' to ensure disk"
8134                                " shutdown, and then restarting it")
8135
8136     if runningon_target:
8137       # the migration has actually succeeded, we need to update the config
8138       self.feedback_fn("* instance running on secondary node (%s),"
8139                        " updating config" % target_node)
8140       instance.primary_node = target_node
8141       self.cfg.Update(instance, self.feedback_fn)
8142       demoted_node = source_node
8143     else:
8144       self.feedback_fn("* instance confirmed to be running on its"
8145                        " primary node (%s)" % source_node)
8146       demoted_node = target_node
8147
8148     if instance.disk_template in constants.DTS_INT_MIRROR:
8149       self._EnsureSecondary(demoted_node)
8150       try:
8151         self._WaitUntilSync()
8152       except errors.OpExecError:
8153         # we ignore here errors, since if the device is standalone, it
8154         # won't be able to sync
8155         pass
8156       self._GoStandalone()
8157       self._GoReconnect(False)
8158       self._WaitUntilSync()
8159
8160     self.feedback_fn("* done")
8161
8162   def _RevertDiskStatus(self):
8163     """Try to revert the disk status after a failed migration.
8164
8165     """
8166     target_node = self.target_node
8167     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8168       return
8169
8170     try:
8171       self._EnsureSecondary(target_node)
8172       self._GoStandalone()
8173       self._GoReconnect(False)
8174       self._WaitUntilSync()
8175     except errors.OpExecError, err:
8176       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8177                          " please try to recover the instance manually;"
8178                          " error '%s'" % str(err))
8179
8180   def _AbortMigration(self):
8181     """Call the hypervisor code to abort a started migration.
8182
8183     """
8184     instance = self.instance
8185     target_node = self.target_node
8186     source_node = self.source_node
8187     migration_info = self.migration_info
8188
8189     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8190                                                                  instance,
8191                                                                  migration_info,
8192                                                                  False)
8193     abort_msg = abort_result.fail_msg
8194     if abort_msg:
8195       logging.error("Aborting migration failed on target node %s: %s",
8196                     target_node, abort_msg)
8197       # Don't raise an exception here, as we stil have to try to revert the
8198       # disk status, even if this step failed.
8199
8200     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8201         instance, False, self.live)
8202     abort_msg = abort_result.fail_msg
8203     if abort_msg:
8204       logging.error("Aborting migration failed on source node %s: %s",
8205                     source_node, abort_msg)
8206
8207   def _ExecMigration(self):
8208     """Migrate an instance.
8209
8210     The migrate is done by:
8211       - change the disks into dual-master mode
8212       - wait until disks are fully synchronized again
8213       - migrate the instance
8214       - change disks on the new secondary node (the old primary) to secondary
8215       - wait until disks are fully synchronized
8216       - change disks into single-master mode
8217
8218     """
8219     instance = self.instance
8220     target_node = self.target_node
8221     source_node = self.source_node
8222
8223     # Check for hypervisor version mismatch and warn the user.
8224     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8225                                        None, [self.instance.hypervisor])
8226     for ninfo in nodeinfo.values():
8227       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8228                   ninfo.node)
8229     (_, _, (src_info, )) = nodeinfo[source_node].payload
8230     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8231
8232     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8233         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8234       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8235       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8236       if src_version != dst_version:
8237         self.feedback_fn("* warning: hypervisor version mismatch between"
8238                          " source (%s) and target (%s) node" %
8239                          (src_version, dst_version))
8240
8241     self.feedback_fn("* checking disk consistency between source and target")
8242     for (idx, dev) in enumerate(instance.disks):
8243       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8244         raise errors.OpExecError("Disk %s is degraded or not fully"
8245                                  " synchronized on target node,"
8246                                  " aborting migration" % idx)
8247
8248     if self.current_mem > self.tgt_free_mem:
8249       if not self.allow_runtime_changes:
8250         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8251                                  " free memory to fit instance %s on target"
8252                                  " node %s (have %dMB, need %dMB)" %
8253                                  (instance.name, target_node,
8254                                   self.tgt_free_mem, self.current_mem))
8255       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8256       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8257                                                      instance,
8258                                                      self.tgt_free_mem)
8259       rpcres.Raise("Cannot modify instance runtime memory")
8260
8261     # First get the migration information from the remote node
8262     result = self.rpc.call_migration_info(source_node, instance)
8263     msg = result.fail_msg
8264     if msg:
8265       log_err = ("Failed fetching source migration information from %s: %s" %
8266                  (source_node, msg))
8267       logging.error(log_err)
8268       raise errors.OpExecError(log_err)
8269
8270     self.migration_info = migration_info = result.payload
8271
8272     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8273       # Then switch the disks to master/master mode
8274       self._EnsureSecondary(target_node)
8275       self._GoStandalone()
8276       self._GoReconnect(True)
8277       self._WaitUntilSync()
8278
8279     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8280     result = self.rpc.call_accept_instance(target_node,
8281                                            instance,
8282                                            migration_info,
8283                                            self.nodes_ip[target_node])
8284
8285     msg = result.fail_msg
8286     if msg:
8287       logging.error("Instance pre-migration failed, trying to revert"
8288                     " disk status: %s", msg)
8289       self.feedback_fn("Pre-migration failed, aborting")
8290       self._AbortMigration()
8291       self._RevertDiskStatus()
8292       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8293                                (instance.name, msg))
8294
8295     self.feedback_fn("* migrating instance to %s" % target_node)
8296     result = self.rpc.call_instance_migrate(source_node, instance,
8297                                             self.nodes_ip[target_node],
8298                                             self.live)
8299     msg = result.fail_msg
8300     if msg:
8301       logging.error("Instance migration failed, trying to revert"
8302                     " disk status: %s", msg)
8303       self.feedback_fn("Migration failed, aborting")
8304       self._AbortMigration()
8305       self._RevertDiskStatus()
8306       raise errors.OpExecError("Could not migrate instance %s: %s" %
8307                                (instance.name, msg))
8308
8309     self.feedback_fn("* starting memory transfer")
8310     last_feedback = time.time()
8311     while True:
8312       result = self.rpc.call_instance_get_migration_status(source_node,
8313                                                            instance)
8314       msg = result.fail_msg
8315       ms = result.payload   # MigrationStatus instance
8316       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8317         logging.error("Instance migration failed, trying to revert"
8318                       " disk status: %s", msg)
8319         self.feedback_fn("Migration failed, aborting")
8320         self._AbortMigration()
8321         self._RevertDiskStatus()
8322         raise errors.OpExecError("Could not migrate instance %s: %s" %
8323                                  (instance.name, msg))
8324
8325       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8326         self.feedback_fn("* memory transfer complete")
8327         break
8328
8329       if (utils.TimeoutExpired(last_feedback,
8330                                self._MIGRATION_FEEDBACK_INTERVAL) and
8331           ms.transferred_ram is not None):
8332         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8333         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8334         last_feedback = time.time()
8335
8336       time.sleep(self._MIGRATION_POLL_INTERVAL)
8337
8338     result = self.rpc.call_instance_finalize_migration_src(source_node,
8339                                                            instance,
8340                                                            True,
8341                                                            self.live)
8342     msg = result.fail_msg
8343     if msg:
8344       logging.error("Instance migration succeeded, but finalization failed"
8345                     " on the source node: %s", msg)
8346       raise errors.OpExecError("Could not finalize instance migration: %s" %
8347                                msg)
8348
8349     instance.primary_node = target_node
8350
8351     # distribute new instance config to the other nodes
8352     self.cfg.Update(instance, self.feedback_fn)
8353
8354     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8355                                                            instance,
8356                                                            migration_info,
8357                                                            True)
8358     msg = result.fail_msg
8359     if msg:
8360       logging.error("Instance migration succeeded, but finalization failed"
8361                     " on the target node: %s", msg)
8362       raise errors.OpExecError("Could not finalize instance migration: %s" %
8363                                msg)
8364
8365     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8366       self._EnsureSecondary(source_node)
8367       self._WaitUntilSync()
8368       self._GoStandalone()
8369       self._GoReconnect(False)
8370       self._WaitUntilSync()
8371
8372     # If the instance's disk template is `rbd' and there was a successful
8373     # migration, unmap the device from the source node.
8374     if self.instance.disk_template == constants.DT_RBD:
8375       disks = _ExpandCheckDisks(instance, instance.disks)
8376       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8377       for disk in disks:
8378         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8379         msg = result.fail_msg
8380         if msg:
8381           logging.error("Migration was successful, but couldn't unmap the"
8382                         " block device %s on source node %s: %s",
8383                         disk.iv_name, source_node, msg)
8384           logging.error("You need to unmap the device %s manually on %s",
8385                         disk.iv_name, source_node)
8386
8387     self.feedback_fn("* done")
8388
8389   def _ExecFailover(self):
8390     """Failover an instance.
8391
8392     The failover is done by shutting it down on its present node and
8393     starting it on the secondary.
8394
8395     """
8396     instance = self.instance
8397     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8398
8399     source_node = instance.primary_node
8400     target_node = self.target_node
8401
8402     if instance.admin_state == constants.ADMINST_UP:
8403       self.feedback_fn("* checking disk consistency between source and target")
8404       for (idx, dev) in enumerate(instance.disks):
8405         # for drbd, these are drbd over lvm
8406         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8407           if primary_node.offline:
8408             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8409                              " target node %s" %
8410                              (primary_node.name, idx, target_node))
8411           elif not self.ignore_consistency:
8412             raise errors.OpExecError("Disk %s is degraded on target node,"
8413                                      " aborting failover" % idx)
8414     else:
8415       self.feedback_fn("* not checking disk consistency as instance is not"
8416                        " running")
8417
8418     self.feedback_fn("* shutting down instance on source node")
8419     logging.info("Shutting down instance %s on node %s",
8420                  instance.name, source_node)
8421
8422     result = self.rpc.call_instance_shutdown(source_node, instance,
8423                                              self.shutdown_timeout)
8424     msg = result.fail_msg
8425     if msg:
8426       if self.ignore_consistency or primary_node.offline:
8427         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8428                            " proceeding anyway; please make sure node"
8429                            " %s is down; error details: %s",
8430                            instance.name, source_node, source_node, msg)
8431       else:
8432         raise errors.OpExecError("Could not shutdown instance %s on"
8433                                  " node %s: %s" %
8434                                  (instance.name, source_node, msg))
8435
8436     self.feedback_fn("* deactivating the instance's disks on source node")
8437     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8438       raise errors.OpExecError("Can't shut down the instance's disks")
8439
8440     instance.primary_node = target_node
8441     # distribute new instance config to the other nodes
8442     self.cfg.Update(instance, self.feedback_fn)
8443
8444     # Only start the instance if it's marked as up
8445     if instance.admin_state == constants.ADMINST_UP:
8446       self.feedback_fn("* activating the instance's disks on target node %s" %
8447                        target_node)
8448       logging.info("Starting instance %s on node %s",
8449                    instance.name, target_node)
8450
8451       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8452                                            ignore_secondaries=True)
8453       if not disks_ok:
8454         _ShutdownInstanceDisks(self.lu, instance)
8455         raise errors.OpExecError("Can't activate the instance's disks")
8456
8457       self.feedback_fn("* starting the instance on the target node %s" %
8458                        target_node)
8459       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8460                                             False)
8461       msg = result.fail_msg
8462       if msg:
8463         _ShutdownInstanceDisks(self.lu, instance)
8464         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8465                                  (instance.name, target_node, msg))
8466
8467   def Exec(self, feedback_fn):
8468     """Perform the migration.
8469
8470     """
8471     self.feedback_fn = feedback_fn
8472     self.source_node = self.instance.primary_node
8473
8474     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8475     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8476       self.target_node = self.instance.secondary_nodes[0]
8477       # Otherwise self.target_node has been populated either
8478       # directly, or through an iallocator.
8479
8480     self.all_nodes = [self.source_node, self.target_node]
8481     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8482                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8483
8484     if self.failover:
8485       feedback_fn("Failover instance %s" % self.instance.name)
8486       self._ExecFailover()
8487     else:
8488       feedback_fn("Migrating instance %s" % self.instance.name)
8489
8490       if self.cleanup:
8491         return self._ExecCleanup()
8492       else:
8493         return self._ExecMigration()
8494
8495
8496 def _CreateBlockDev(lu, node, instance, device, force_create,
8497                     info, force_open):
8498   """Create a tree of block devices on a given node.
8499
8500   If this device type has to be created on secondaries, create it and
8501   all its children.
8502
8503   If not, just recurse to children keeping the same 'force' value.
8504
8505   @param lu: the lu on whose behalf we execute
8506   @param node: the node on which to create the device
8507   @type instance: L{objects.Instance}
8508   @param instance: the instance which owns the device
8509   @type device: L{objects.Disk}
8510   @param device: the device to create
8511   @type force_create: boolean
8512   @param force_create: whether to force creation of this device; this
8513       will be change to True whenever we find a device which has
8514       CreateOnSecondary() attribute
8515   @param info: the extra 'metadata' we should attach to the device
8516       (this will be represented as a LVM tag)
8517   @type force_open: boolean
8518   @param force_open: this parameter will be passes to the
8519       L{backend.BlockdevCreate} function where it specifies
8520       whether we run on primary or not, and it affects both
8521       the child assembly and the device own Open() execution
8522
8523   """
8524   if device.CreateOnSecondary():
8525     force_create = True
8526
8527   if device.children:
8528     for child in device.children:
8529       _CreateBlockDev(lu, node, instance, child, force_create,
8530                       info, force_open)
8531
8532   if not force_create:
8533     return
8534
8535   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8536
8537
8538 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8539   """Create a single block device on a given node.
8540
8541   This will not recurse over children of the device, so they must be
8542   created in advance.
8543
8544   @param lu: the lu on whose behalf we execute
8545   @param node: the node on which to create the device
8546   @type instance: L{objects.Instance}
8547   @param instance: the instance which owns the device
8548   @type device: L{objects.Disk}
8549   @param device: the device to create
8550   @param info: the extra 'metadata' we should attach to the device
8551       (this will be represented as a LVM tag)
8552   @type force_open: boolean
8553   @param force_open: this parameter will be passes to the
8554       L{backend.BlockdevCreate} function where it specifies
8555       whether we run on primary or not, and it affects both
8556       the child assembly and the device own Open() execution
8557
8558   """
8559   lu.cfg.SetDiskID(device, node)
8560   result = lu.rpc.call_blockdev_create(node, device, device.size,
8561                                        instance.name, force_open, info)
8562   result.Raise("Can't create block device %s on"
8563                " node %s for instance %s" % (device, node, instance.name))
8564   if device.physical_id is None:
8565     device.physical_id = result.payload
8566
8567
8568 def _GenerateUniqueNames(lu, exts):
8569   """Generate a suitable LV name.
8570
8571   This will generate a logical volume name for the given instance.
8572
8573   """
8574   results = []
8575   for val in exts:
8576     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8577     results.append("%s%s" % (new_id, val))
8578   return results
8579
8580
8581 def _ComputeLDParams(disk_template, disk_params):
8582   """Computes Logical Disk parameters from Disk Template parameters.
8583
8584   @type disk_template: string
8585   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8586   @type disk_params: dict
8587   @param disk_params: disk template parameters; dict(template_name -> parameters
8588   @rtype: list(dict)
8589   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8590     contains the LD parameters of the node. The tree is flattened in-order.
8591
8592   """
8593   if disk_template not in constants.DISK_TEMPLATES:
8594     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8595
8596   result = list()
8597   dt_params = disk_params[disk_template]
8598   if disk_template == constants.DT_DRBD8:
8599     drbd_params = {
8600       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8601       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8602       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8603       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8604       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8605       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8606       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8607       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8608       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8609       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8610       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8611       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8612       }
8613
8614     drbd_params = \
8615       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8616                        drbd_params)
8617
8618     result.append(drbd_params)
8619
8620     # data LV
8621     data_params = {
8622       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8623       }
8624     data_params = \
8625       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626                        data_params)
8627     result.append(data_params)
8628
8629     # metadata LV
8630     meta_params = {
8631       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8632       }
8633     meta_params = \
8634       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8635                        meta_params)
8636     result.append(meta_params)
8637
8638   elif (disk_template == constants.DT_FILE or
8639         disk_template == constants.DT_SHARED_FILE):
8640     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8641
8642   elif disk_template == constants.DT_PLAIN:
8643     params = {
8644       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8645       }
8646     params = \
8647       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8648                        params)
8649     result.append(params)
8650
8651   elif disk_template == constants.DT_BLOCK:
8652     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8653
8654   elif disk_template == constants.DT_RBD:
8655     params = {
8656       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8657       }
8658     params = \
8659       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8660                        params)
8661     result.append(params)
8662
8663   return result
8664
8665
8666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8667                          iv_name, p_minor, s_minor, drbd_params, data_params,
8668                          meta_params):
8669   """Generate a drbd8 device complete with its children.
8670
8671   """
8672   assert len(vgnames) == len(names) == 2
8673   port = lu.cfg.AllocatePort()
8674   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8675
8676   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8677                           logical_id=(vgnames[0], names[0]),
8678                           params=data_params)
8679   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8680                           logical_id=(vgnames[1], names[1]),
8681                           params=meta_params)
8682   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8683                           logical_id=(primary, secondary, port,
8684                                       p_minor, s_minor,
8685                                       shared_secret),
8686                           children=[dev_data, dev_meta],
8687                           iv_name=iv_name, params=drbd_params)
8688   return drbd_dev
8689
8690
8691 _DISK_TEMPLATE_NAME_PREFIX = {
8692   constants.DT_PLAIN: "",
8693   constants.DT_RBD: ".rbd",
8694   }
8695
8696
8697 _DISK_TEMPLATE_DEVICE_TYPE = {
8698   constants.DT_PLAIN: constants.LD_LV,
8699   constants.DT_FILE: constants.LD_FILE,
8700   constants.DT_SHARED_FILE: constants.LD_FILE,
8701   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8702   constants.DT_RBD: constants.LD_RBD,
8703   }
8704
8705
8706 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8707     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8708     feedback_fn, disk_params,
8709     _req_file_storage=opcodes.RequireFileStorage,
8710     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8711   """Generate the entire disk layout for a given template type.
8712
8713   """
8714   #TODO: compute space requirements
8715
8716   vgname = lu.cfg.GetVGName()
8717   disk_count = len(disk_info)
8718   disks = []
8719   ld_params = _ComputeLDParams(template_name, disk_params)
8720
8721   if template_name == constants.DT_DISKLESS:
8722     pass
8723   elif template_name == constants.DT_DRBD8:
8724     drbd_params, data_params, meta_params = ld_params
8725     if len(secondary_nodes) != 1:
8726       raise errors.ProgrammerError("Wrong template configuration")
8727     remote_node = secondary_nodes[0]
8728     minors = lu.cfg.AllocateDRBDMinor(
8729       [primary_node, remote_node] * len(disk_info), instance_name)
8730
8731     names = []
8732     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8733                                                for i in range(disk_count)]):
8734       names.append(lv_prefix + "_data")
8735       names.append(lv_prefix + "_meta")
8736     for idx, disk in enumerate(disk_info):
8737       disk_index = idx + base_index
8738       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8739       data_vg = disk.get(constants.IDISK_VG, vgname)
8740       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8741       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8742                                       disk[constants.IDISK_SIZE],
8743                                       [data_vg, meta_vg],
8744                                       names[idx * 2:idx * 2 + 2],
8745                                       "disk/%d" % disk_index,
8746                                       minors[idx * 2], minors[idx * 2 + 1],
8747                                       drbd_params, data_params, meta_params)
8748       disk_dev.mode = disk[constants.IDISK_MODE]
8749       disks.append(disk_dev)
8750   else:
8751     if secondary_nodes:
8752       raise errors.ProgrammerError("Wrong template configuration")
8753
8754     if template_name == constants.DT_FILE:
8755       _req_file_storage()
8756     elif template_name == constants.DT_SHARED_FILE:
8757       _req_shr_file_storage()
8758
8759     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8760     if name_prefix is None:
8761       names = None
8762     else:
8763       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8764                                         (name_prefix, base_index + i)
8765                                         for i in range(disk_count)])
8766
8767     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8768
8769     if template_name == constants.DT_PLAIN:
8770       def logical_id_fn(idx, _, disk):
8771         vg = disk.get(constants.IDISK_VG, vgname)
8772         return (vg, names[idx])
8773     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8774       logical_id_fn = \
8775         lambda _, disk_index, disk: (file_driver,
8776                                      "%s/disk%d" % (file_storage_dir,
8777                                                     disk_index))
8778     elif template_name == constants.DT_BLOCK:
8779       logical_id_fn = \
8780         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8781                                        disk[constants.IDISK_ADOPT])
8782     elif template_name == constants.DT_RBD:
8783       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8784     else:
8785       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8786
8787     for idx, disk in enumerate(disk_info):
8788       disk_index = idx + base_index
8789       size = disk[constants.IDISK_SIZE]
8790       feedback_fn("* disk %s, size %s" %
8791                   (disk_index, utils.FormatUnit(size, "h")))
8792       disks.append(objects.Disk(dev_type=dev_type, size=size,
8793                                 logical_id=logical_id_fn(idx, disk_index, disk),
8794                                 iv_name="disk/%d" % disk_index,
8795                                 mode=disk[constants.IDISK_MODE],
8796                                 params=ld_params[0]))
8797
8798   return disks
8799
8800
8801 def _GetInstanceInfoText(instance):
8802   """Compute that text that should be added to the disk's metadata.
8803
8804   """
8805   return "originstname+%s" % instance.name
8806
8807
8808 def _CalcEta(time_taken, written, total_size):
8809   """Calculates the ETA based on size written and total size.
8810
8811   @param time_taken: The time taken so far
8812   @param written: amount written so far
8813   @param total_size: The total size of data to be written
8814   @return: The remaining time in seconds
8815
8816   """
8817   avg_time = time_taken / float(written)
8818   return (total_size - written) * avg_time
8819
8820
8821 def _WipeDisks(lu, instance):
8822   """Wipes instance disks.
8823
8824   @type lu: L{LogicalUnit}
8825   @param lu: the logical unit on whose behalf we execute
8826   @type instance: L{objects.Instance}
8827   @param instance: the instance whose disks we should create
8828   @return: the success of the wipe
8829
8830   """
8831   node = instance.primary_node
8832
8833   for device in instance.disks:
8834     lu.cfg.SetDiskID(device, node)
8835
8836   logging.info("Pause sync of instance %s disks", instance.name)
8837   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8838
8839   for idx, success in enumerate(result.payload):
8840     if not success:
8841       logging.warn("pause-sync of instance %s for disks %d failed",
8842                    instance.name, idx)
8843
8844   try:
8845     for idx, device in enumerate(instance.disks):
8846       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8847       # MAX_WIPE_CHUNK at max
8848       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8849                             constants.MIN_WIPE_CHUNK_PERCENT)
8850       # we _must_ make this an int, otherwise rounding errors will
8851       # occur
8852       wipe_chunk_size = int(wipe_chunk_size)
8853
8854       lu.LogInfo("* Wiping disk %d", idx)
8855       logging.info("Wiping disk %d for instance %s, node %s using"
8856                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8857
8858       offset = 0
8859       size = device.size
8860       last_output = 0
8861       start_time = time.time()
8862
8863       while offset < size:
8864         wipe_size = min(wipe_chunk_size, size - offset)
8865         logging.debug("Wiping disk %d, offset %s, chunk %s",
8866                       idx, offset, wipe_size)
8867         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8868         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8869                      (idx, offset, wipe_size))
8870         now = time.time()
8871         offset += wipe_size
8872         if now - last_output >= 60:
8873           eta = _CalcEta(now - start_time, offset, size)
8874           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8875                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8876           last_output = now
8877   finally:
8878     logging.info("Resume sync of instance %s disks", instance.name)
8879
8880     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8881
8882     for idx, success in enumerate(result.payload):
8883       if not success:
8884         lu.LogWarning("Resume sync of disk %d failed, please have a"
8885                       " look at the status and troubleshoot the issue", idx)
8886         logging.warn("resume-sync of instance %s for disks %d failed",
8887                      instance.name, idx)
8888
8889
8890 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8891   """Create all disks for an instance.
8892
8893   This abstracts away some work from AddInstance.
8894
8895   @type lu: L{LogicalUnit}
8896   @param lu: the logical unit on whose behalf we execute
8897   @type instance: L{objects.Instance}
8898   @param instance: the instance whose disks we should create
8899   @type to_skip: list
8900   @param to_skip: list of indices to skip
8901   @type target_node: string
8902   @param target_node: if passed, overrides the target node for creation
8903   @rtype: boolean
8904   @return: the success of the creation
8905
8906   """
8907   info = _GetInstanceInfoText(instance)
8908   if target_node is None:
8909     pnode = instance.primary_node
8910     all_nodes = instance.all_nodes
8911   else:
8912     pnode = target_node
8913     all_nodes = [pnode]
8914
8915   if instance.disk_template in constants.DTS_FILEBASED:
8916     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8917     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8918
8919     result.Raise("Failed to create directory '%s' on"
8920                  " node %s" % (file_storage_dir, pnode))
8921
8922   # Note: this needs to be kept in sync with adding of disks in
8923   # LUInstanceSetParams
8924   for idx, device in enumerate(instance.disks):
8925     if to_skip and idx in to_skip:
8926       continue
8927     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8928     #HARDCODE
8929     for node in all_nodes:
8930       f_create = node == pnode
8931       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8932
8933
8934 def _RemoveDisks(lu, instance, target_node=None):
8935   """Remove all disks for an instance.
8936
8937   This abstracts away some work from `AddInstance()` and
8938   `RemoveInstance()`. Note that in case some of the devices couldn't
8939   be removed, the removal will continue with the other ones (compare
8940   with `_CreateDisks()`).
8941
8942   @type lu: L{LogicalUnit}
8943   @param lu: the logical unit on whose behalf we execute
8944   @type instance: L{objects.Instance}
8945   @param instance: the instance whose disks we should remove
8946   @type target_node: string
8947   @param target_node: used to override the node on which to remove the disks
8948   @rtype: boolean
8949   @return: the success of the removal
8950
8951   """
8952   logging.info("Removing block devices for instance %s", instance.name)
8953
8954   all_result = True
8955   for (idx, device) in enumerate(instance.disks):
8956     if target_node:
8957       edata = [(target_node, device)]
8958     else:
8959       edata = device.ComputeNodeTree(instance.primary_node)
8960     for node, disk in edata:
8961       lu.cfg.SetDiskID(disk, node)
8962       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8963       if msg:
8964         lu.LogWarning("Could not remove disk %s on node %s,"
8965                       " continuing anyway: %s", idx, node, msg)
8966         all_result = False
8967
8968     # if this is a DRBD disk, return its port to the pool
8969     if device.dev_type in constants.LDS_DRBD:
8970       tcp_port = device.logical_id[2]
8971       lu.cfg.AddTcpUdpPort(tcp_port)
8972
8973   if instance.disk_template == constants.DT_FILE:
8974     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8975     if target_node:
8976       tgt = target_node
8977     else:
8978       tgt = instance.primary_node
8979     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8980     if result.fail_msg:
8981       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8982                     file_storage_dir, instance.primary_node, result.fail_msg)
8983       all_result = False
8984
8985   return all_result
8986
8987
8988 def _ComputeDiskSizePerVG(disk_template, disks):
8989   """Compute disk size requirements in the volume group
8990
8991   """
8992   def _compute(disks, payload):
8993     """Universal algorithm.
8994
8995     """
8996     vgs = {}
8997     for disk in disks:
8998       vgs[disk[constants.IDISK_VG]] = \
8999         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9000
9001     return vgs
9002
9003   # Required free disk space as a function of disk and swap space
9004   req_size_dict = {
9005     constants.DT_DISKLESS: {},
9006     constants.DT_PLAIN: _compute(disks, 0),
9007     # 128 MB are added for drbd metadata for each disk
9008     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9009     constants.DT_FILE: {},
9010     constants.DT_SHARED_FILE: {},
9011   }
9012
9013   if disk_template not in req_size_dict:
9014     raise errors.ProgrammerError("Disk template '%s' size requirement"
9015                                  " is unknown" % disk_template)
9016
9017   return req_size_dict[disk_template]
9018
9019
9020 def _ComputeDiskSize(disk_template, disks):
9021   """Compute disk size requirements in the volume group
9022
9023   """
9024   # Required free disk space as a function of disk and swap space
9025   req_size_dict = {
9026     constants.DT_DISKLESS: None,
9027     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9028     # 128 MB are added for drbd metadata for each disk
9029     constants.DT_DRBD8:
9030       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9031     constants.DT_FILE: None,
9032     constants.DT_SHARED_FILE: 0,
9033     constants.DT_BLOCK: 0,
9034     constants.DT_RBD: 0,
9035   }
9036
9037   if disk_template not in req_size_dict:
9038     raise errors.ProgrammerError("Disk template '%s' size requirement"
9039                                  " is unknown" % disk_template)
9040
9041   return req_size_dict[disk_template]
9042
9043
9044 def _FilterVmNodes(lu, nodenames):
9045   """Filters out non-vm_capable nodes from a list.
9046
9047   @type lu: L{LogicalUnit}
9048   @param lu: the logical unit for which we check
9049   @type nodenames: list
9050   @param nodenames: the list of nodes on which we should check
9051   @rtype: list
9052   @return: the list of vm-capable nodes
9053
9054   """
9055   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9056   return [name for name in nodenames if name not in vm_nodes]
9057
9058
9059 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9060   """Hypervisor parameter validation.
9061
9062   This function abstract the hypervisor parameter validation to be
9063   used in both instance create and instance modify.
9064
9065   @type lu: L{LogicalUnit}
9066   @param lu: the logical unit for which we check
9067   @type nodenames: list
9068   @param nodenames: the list of nodes on which we should check
9069   @type hvname: string
9070   @param hvname: the name of the hypervisor we should use
9071   @type hvparams: dict
9072   @param hvparams: the parameters which we need to check
9073   @raise errors.OpPrereqError: if the parameters are not valid
9074
9075   """
9076   nodenames = _FilterVmNodes(lu, nodenames)
9077
9078   cluster = lu.cfg.GetClusterInfo()
9079   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9080
9081   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9082   for node in nodenames:
9083     info = hvinfo[node]
9084     if info.offline:
9085       continue
9086     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9087
9088
9089 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9090   """OS parameters validation.
9091
9092   @type lu: L{LogicalUnit}
9093   @param lu: the logical unit for which we check
9094   @type required: boolean
9095   @param required: whether the validation should fail if the OS is not
9096       found
9097   @type nodenames: list
9098   @param nodenames: the list of nodes on which we should check
9099   @type osname: string
9100   @param osname: the name of the hypervisor we should use
9101   @type osparams: dict
9102   @param osparams: the parameters which we need to check
9103   @raise errors.OpPrereqError: if the parameters are not valid
9104
9105   """
9106   nodenames = _FilterVmNodes(lu, nodenames)
9107   result = lu.rpc.call_os_validate(nodenames, required, osname,
9108                                    [constants.OS_VALIDATE_PARAMETERS],
9109                                    osparams)
9110   for node, nres in result.items():
9111     # we don't check for offline cases since this should be run only
9112     # against the master node and/or an instance's nodes
9113     nres.Raise("OS Parameters validation failed on node %s" % node)
9114     if not nres.payload:
9115       lu.LogInfo("OS %s not found on node %s, validation skipped",
9116                  osname, node)
9117
9118
9119 class LUInstanceCreate(LogicalUnit):
9120   """Create an instance.
9121
9122   """
9123   HPATH = "instance-add"
9124   HTYPE = constants.HTYPE_INSTANCE
9125   REQ_BGL = False
9126
9127   def CheckArguments(self):
9128     """Check arguments.
9129
9130     """
9131     # do not require name_check to ease forward/backward compatibility
9132     # for tools
9133     if self.op.no_install and self.op.start:
9134       self.LogInfo("No-installation mode selected, disabling startup")
9135       self.op.start = False
9136     # validate/normalize the instance name
9137     self.op.instance_name = \
9138       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9139
9140     if self.op.ip_check and not self.op.name_check:
9141       # TODO: make the ip check more flexible and not depend on the name check
9142       raise errors.OpPrereqError("Cannot do IP address check without a name"
9143                                  " check", errors.ECODE_INVAL)
9144
9145     # check nics' parameter names
9146     for nic in self.op.nics:
9147       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9148
9149     # check disks. parameter names and consistent adopt/no-adopt strategy
9150     has_adopt = has_no_adopt = False
9151     for disk in self.op.disks:
9152       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9153       if constants.IDISK_ADOPT in disk:
9154         has_adopt = True
9155       else:
9156         has_no_adopt = True
9157     if has_adopt and has_no_adopt:
9158       raise errors.OpPrereqError("Either all disks are adopted or none is",
9159                                  errors.ECODE_INVAL)
9160     if has_adopt:
9161       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9162         raise errors.OpPrereqError("Disk adoption is not supported for the"
9163                                    " '%s' disk template" %
9164                                    self.op.disk_template,
9165                                    errors.ECODE_INVAL)
9166       if self.op.iallocator is not None:
9167         raise errors.OpPrereqError("Disk adoption not allowed with an"
9168                                    " iallocator script", errors.ECODE_INVAL)
9169       if self.op.mode == constants.INSTANCE_IMPORT:
9170         raise errors.OpPrereqError("Disk adoption not allowed for"
9171                                    " instance import", errors.ECODE_INVAL)
9172     else:
9173       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9174         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9175                                    " but no 'adopt' parameter given" %
9176                                    self.op.disk_template,
9177                                    errors.ECODE_INVAL)
9178
9179     self.adopt_disks = has_adopt
9180
9181     # instance name verification
9182     if self.op.name_check:
9183       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9184       self.op.instance_name = self.hostname1.name
9185       # used in CheckPrereq for ip ping check
9186       self.check_ip = self.hostname1.ip
9187     else:
9188       self.check_ip = None
9189
9190     # file storage checks
9191     if (self.op.file_driver and
9192         not self.op.file_driver in constants.FILE_DRIVER):
9193       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9194                                  self.op.file_driver, errors.ECODE_INVAL)
9195
9196     if self.op.disk_template == constants.DT_FILE:
9197       opcodes.RequireFileStorage()
9198     elif self.op.disk_template == constants.DT_SHARED_FILE:
9199       opcodes.RequireSharedFileStorage()
9200
9201     ### Node/iallocator related checks
9202     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9203
9204     if self.op.pnode is not None:
9205       if self.op.disk_template in constants.DTS_INT_MIRROR:
9206         if self.op.snode is None:
9207           raise errors.OpPrereqError("The networked disk templates need"
9208                                      " a mirror node", errors.ECODE_INVAL)
9209       elif self.op.snode:
9210         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9211                         " template")
9212         self.op.snode = None
9213
9214     self._cds = _GetClusterDomainSecret()
9215
9216     if self.op.mode == constants.INSTANCE_IMPORT:
9217       # On import force_variant must be True, because if we forced it at
9218       # initial install, our only chance when importing it back is that it
9219       # works again!
9220       self.op.force_variant = True
9221
9222       if self.op.no_install:
9223         self.LogInfo("No-installation mode has no effect during import")
9224
9225     elif self.op.mode == constants.INSTANCE_CREATE:
9226       if self.op.os_type is None:
9227         raise errors.OpPrereqError("No guest OS specified",
9228                                    errors.ECODE_INVAL)
9229       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9230         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9231                                    " installation" % self.op.os_type,
9232                                    errors.ECODE_STATE)
9233       if self.op.disk_template is None:
9234         raise errors.OpPrereqError("No disk template specified",
9235                                    errors.ECODE_INVAL)
9236
9237     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9238       # Check handshake to ensure both clusters have the same domain secret
9239       src_handshake = self.op.source_handshake
9240       if not src_handshake:
9241         raise errors.OpPrereqError("Missing source handshake",
9242                                    errors.ECODE_INVAL)
9243
9244       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9245                                                            src_handshake)
9246       if errmsg:
9247         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9248                                    errors.ECODE_INVAL)
9249
9250       # Load and check source CA
9251       self.source_x509_ca_pem = self.op.source_x509_ca
9252       if not self.source_x509_ca_pem:
9253         raise errors.OpPrereqError("Missing source X509 CA",
9254                                    errors.ECODE_INVAL)
9255
9256       try:
9257         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9258                                                     self._cds)
9259       except OpenSSL.crypto.Error, err:
9260         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9261                                    (err, ), errors.ECODE_INVAL)
9262
9263       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9264       if errcode is not None:
9265         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9266                                    errors.ECODE_INVAL)
9267
9268       self.source_x509_ca = cert
9269
9270       src_instance_name = self.op.source_instance_name
9271       if not src_instance_name:
9272         raise errors.OpPrereqError("Missing source instance name",
9273                                    errors.ECODE_INVAL)
9274
9275       self.source_instance_name = \
9276           netutils.GetHostname(name=src_instance_name).name
9277
9278     else:
9279       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9280                                  self.op.mode, errors.ECODE_INVAL)
9281
9282   def ExpandNames(self):
9283     """ExpandNames for CreateInstance.
9284
9285     Figure out the right locks for instance creation.
9286
9287     """
9288     self.needed_locks = {}
9289
9290     instance_name = self.op.instance_name
9291     # this is just a preventive check, but someone might still add this
9292     # instance in the meantime, and creation will fail at lock-add time
9293     if instance_name in self.cfg.GetInstanceList():
9294       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9295                                  instance_name, errors.ECODE_EXISTS)
9296
9297     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9298
9299     if self.op.iallocator:
9300       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9301       # specifying a group on instance creation and then selecting nodes from
9302       # that group
9303       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9304       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9305     else:
9306       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9307       nodelist = [self.op.pnode]
9308       if self.op.snode is not None:
9309         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9310         nodelist.append(self.op.snode)
9311       self.needed_locks[locking.LEVEL_NODE] = nodelist
9312       # Lock resources of instance's primary and secondary nodes (copy to
9313       # prevent accidential modification)
9314       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9315
9316     # in case of import lock the source node too
9317     if self.op.mode == constants.INSTANCE_IMPORT:
9318       src_node = self.op.src_node
9319       src_path = self.op.src_path
9320
9321       if src_path is None:
9322         self.op.src_path = src_path = self.op.instance_name
9323
9324       if src_node is None:
9325         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9326         self.op.src_node = None
9327         if os.path.isabs(src_path):
9328           raise errors.OpPrereqError("Importing an instance from a path"
9329                                      " requires a source node option",
9330                                      errors.ECODE_INVAL)
9331       else:
9332         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9333         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9334           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9335         if not os.path.isabs(src_path):
9336           self.op.src_path = src_path = \
9337             utils.PathJoin(constants.EXPORT_DIR, src_path)
9338
9339   def _RunAllocator(self):
9340     """Run the allocator based on input opcode.
9341
9342     """
9343     nics = [n.ToDict() for n in self.nics]
9344     ial = IAllocator(self.cfg, self.rpc,
9345                      mode=constants.IALLOCATOR_MODE_ALLOC,
9346                      name=self.op.instance_name,
9347                      disk_template=self.op.disk_template,
9348                      tags=self.op.tags,
9349                      os=self.op.os_type,
9350                      vcpus=self.be_full[constants.BE_VCPUS],
9351                      memory=self.be_full[constants.BE_MAXMEM],
9352                      disks=self.disks,
9353                      nics=nics,
9354                      hypervisor=self.op.hypervisor,
9355                      )
9356
9357     ial.Run(self.op.iallocator)
9358
9359     if not ial.success:
9360       raise errors.OpPrereqError("Can't compute nodes using"
9361                                  " iallocator '%s': %s" %
9362                                  (self.op.iallocator, ial.info),
9363                                  errors.ECODE_NORES)
9364     if len(ial.result) != ial.required_nodes:
9365       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9366                                  " of nodes (%s), required %s" %
9367                                  (self.op.iallocator, len(ial.result),
9368                                   ial.required_nodes), errors.ECODE_FAULT)
9369     self.op.pnode = ial.result[0]
9370     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9371                  self.op.instance_name, self.op.iallocator,
9372                  utils.CommaJoin(ial.result))
9373     if ial.required_nodes == 2:
9374       self.op.snode = ial.result[1]
9375
9376   def BuildHooksEnv(self):
9377     """Build hooks env.
9378
9379     This runs on master, primary and secondary nodes of the instance.
9380
9381     """
9382     env = {
9383       "ADD_MODE": self.op.mode,
9384       }
9385     if self.op.mode == constants.INSTANCE_IMPORT:
9386       env["SRC_NODE"] = self.op.src_node
9387       env["SRC_PATH"] = self.op.src_path
9388       env["SRC_IMAGES"] = self.src_images
9389
9390     env.update(_BuildInstanceHookEnv(
9391       name=self.op.instance_name,
9392       primary_node=self.op.pnode,
9393       secondary_nodes=self.secondaries,
9394       status=self.op.start,
9395       os_type=self.op.os_type,
9396       minmem=self.be_full[constants.BE_MINMEM],
9397       maxmem=self.be_full[constants.BE_MAXMEM],
9398       vcpus=self.be_full[constants.BE_VCPUS],
9399       nics=_NICListToTuple(self, self.nics),
9400       disk_template=self.op.disk_template,
9401       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9402              for d in self.disks],
9403       bep=self.be_full,
9404       hvp=self.hv_full,
9405       hypervisor_name=self.op.hypervisor,
9406       tags=self.op.tags,
9407     ))
9408
9409     return env
9410
9411   def BuildHooksNodes(self):
9412     """Build hooks nodes.
9413
9414     """
9415     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9416     return nl, nl
9417
9418   def _ReadExportInfo(self):
9419     """Reads the export information from disk.
9420
9421     It will override the opcode source node and path with the actual
9422     information, if these two were not specified before.
9423
9424     @return: the export information
9425
9426     """
9427     assert self.op.mode == constants.INSTANCE_IMPORT
9428
9429     src_node = self.op.src_node
9430     src_path = self.op.src_path
9431
9432     if src_node is None:
9433       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9434       exp_list = self.rpc.call_export_list(locked_nodes)
9435       found = False
9436       for node in exp_list:
9437         if exp_list[node].fail_msg:
9438           continue
9439         if src_path in exp_list[node].payload:
9440           found = True
9441           self.op.src_node = src_node = node
9442           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9443                                                        src_path)
9444           break
9445       if not found:
9446         raise errors.OpPrereqError("No export found for relative path %s" %
9447                                     src_path, errors.ECODE_INVAL)
9448
9449     _CheckNodeOnline(self, src_node)
9450     result = self.rpc.call_export_info(src_node, src_path)
9451     result.Raise("No export or invalid export found in dir %s" % src_path)
9452
9453     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9454     if not export_info.has_section(constants.INISECT_EXP):
9455       raise errors.ProgrammerError("Corrupted export config",
9456                                    errors.ECODE_ENVIRON)
9457
9458     ei_version = export_info.get(constants.INISECT_EXP, "version")
9459     if (int(ei_version) != constants.EXPORT_VERSION):
9460       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9461                                  (ei_version, constants.EXPORT_VERSION),
9462                                  errors.ECODE_ENVIRON)
9463     return export_info
9464
9465   def _ReadExportParams(self, einfo):
9466     """Use export parameters as defaults.
9467
9468     In case the opcode doesn't specify (as in override) some instance
9469     parameters, then try to use them from the export information, if
9470     that declares them.
9471
9472     """
9473     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9474
9475     if self.op.disk_template is None:
9476       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9477         self.op.disk_template = einfo.get(constants.INISECT_INS,
9478                                           "disk_template")
9479         if self.op.disk_template not in constants.DISK_TEMPLATES:
9480           raise errors.OpPrereqError("Disk template specified in configuration"
9481                                      " file is not one of the allowed values:"
9482                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9483       else:
9484         raise errors.OpPrereqError("No disk template specified and the export"
9485                                    " is missing the disk_template information",
9486                                    errors.ECODE_INVAL)
9487
9488     if not self.op.disks:
9489       disks = []
9490       # TODO: import the disk iv_name too
9491       for idx in range(constants.MAX_DISKS):
9492         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9493           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9494           disks.append({constants.IDISK_SIZE: disk_sz})
9495       self.op.disks = disks
9496       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9497         raise errors.OpPrereqError("No disk info specified and the export"
9498                                    " is missing the disk information",
9499                                    errors.ECODE_INVAL)
9500
9501     if not self.op.nics:
9502       nics = []
9503       for idx in range(constants.MAX_NICS):
9504         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9505           ndict = {}
9506           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9507             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9508             ndict[name] = v
9509           nics.append(ndict)
9510         else:
9511           break
9512       self.op.nics = nics
9513
9514     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9515       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9516
9517     if (self.op.hypervisor is None and
9518         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9519       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9520
9521     if einfo.has_section(constants.INISECT_HYP):
9522       # use the export parameters but do not override the ones
9523       # specified by the user
9524       for name, value in einfo.items(constants.INISECT_HYP):
9525         if name not in self.op.hvparams:
9526           self.op.hvparams[name] = value
9527
9528     if einfo.has_section(constants.INISECT_BEP):
9529       # use the parameters, without overriding
9530       for name, value in einfo.items(constants.INISECT_BEP):
9531         if name not in self.op.beparams:
9532           self.op.beparams[name] = value
9533         # Compatibility for the old "memory" be param
9534         if name == constants.BE_MEMORY:
9535           if constants.BE_MAXMEM not in self.op.beparams:
9536             self.op.beparams[constants.BE_MAXMEM] = value
9537           if constants.BE_MINMEM not in self.op.beparams:
9538             self.op.beparams[constants.BE_MINMEM] = value
9539     else:
9540       # try to read the parameters old style, from the main section
9541       for name in constants.BES_PARAMETERS:
9542         if (name not in self.op.beparams and
9543             einfo.has_option(constants.INISECT_INS, name)):
9544           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9545
9546     if einfo.has_section(constants.INISECT_OSP):
9547       # use the parameters, without overriding
9548       for name, value in einfo.items(constants.INISECT_OSP):
9549         if name not in self.op.osparams:
9550           self.op.osparams[name] = value
9551
9552   def _RevertToDefaults(self, cluster):
9553     """Revert the instance parameters to the default values.
9554
9555     """
9556     # hvparams
9557     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9558     for name in self.op.hvparams.keys():
9559       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9560         del self.op.hvparams[name]
9561     # beparams
9562     be_defs = cluster.SimpleFillBE({})
9563     for name in self.op.beparams.keys():
9564       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9565         del self.op.beparams[name]
9566     # nic params
9567     nic_defs = cluster.SimpleFillNIC({})
9568     for nic in self.op.nics:
9569       for name in constants.NICS_PARAMETERS:
9570         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9571           del nic[name]
9572     # osparams
9573     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9574     for name in self.op.osparams.keys():
9575       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9576         del self.op.osparams[name]
9577
9578   def _CalculateFileStorageDir(self):
9579     """Calculate final instance file storage dir.
9580
9581     """
9582     # file storage dir calculation/check
9583     self.instance_file_storage_dir = None
9584     if self.op.disk_template in constants.DTS_FILEBASED:
9585       # build the full file storage dir path
9586       joinargs = []
9587
9588       if self.op.disk_template == constants.DT_SHARED_FILE:
9589         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9590       else:
9591         get_fsd_fn = self.cfg.GetFileStorageDir
9592
9593       cfg_storagedir = get_fsd_fn()
9594       if not cfg_storagedir:
9595         raise errors.OpPrereqError("Cluster file storage dir not defined")
9596       joinargs.append(cfg_storagedir)
9597
9598       if self.op.file_storage_dir is not None:
9599         joinargs.append(self.op.file_storage_dir)
9600
9601       joinargs.append(self.op.instance_name)
9602
9603       # pylint: disable=W0142
9604       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9605
9606   def CheckPrereq(self): # pylint: disable=R0914
9607     """Check prerequisites.
9608
9609     """
9610     self._CalculateFileStorageDir()
9611
9612     if self.op.mode == constants.INSTANCE_IMPORT:
9613       export_info = self._ReadExportInfo()
9614       self._ReadExportParams(export_info)
9615
9616     if (not self.cfg.GetVGName() and
9617         self.op.disk_template not in constants.DTS_NOT_LVM):
9618       raise errors.OpPrereqError("Cluster does not support lvm-based"
9619                                  " instances", errors.ECODE_STATE)
9620
9621     if (self.op.hypervisor is None or
9622         self.op.hypervisor == constants.VALUE_AUTO):
9623       self.op.hypervisor = self.cfg.GetHypervisorType()
9624
9625     cluster = self.cfg.GetClusterInfo()
9626     enabled_hvs = cluster.enabled_hypervisors
9627     if self.op.hypervisor not in enabled_hvs:
9628       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9629                                  " cluster (%s)" % (self.op.hypervisor,
9630                                   ",".join(enabled_hvs)),
9631                                  errors.ECODE_STATE)
9632
9633     # Check tag validity
9634     for tag in self.op.tags:
9635       objects.TaggableObject.ValidateTag(tag)
9636
9637     # check hypervisor parameter syntax (locally)
9638     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9639     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9640                                       self.op.hvparams)
9641     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9642     hv_type.CheckParameterSyntax(filled_hvp)
9643     self.hv_full = filled_hvp
9644     # check that we don't specify global parameters on an instance
9645     _CheckGlobalHvParams(self.op.hvparams)
9646
9647     # fill and remember the beparams dict
9648     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9649     for param, value in self.op.beparams.iteritems():
9650       if value == constants.VALUE_AUTO:
9651         self.op.beparams[param] = default_beparams[param]
9652     objects.UpgradeBeParams(self.op.beparams)
9653     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9654     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9655
9656     # build os parameters
9657     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9658
9659     # now that hvp/bep are in final format, let's reset to defaults,
9660     # if told to do so
9661     if self.op.identify_defaults:
9662       self._RevertToDefaults(cluster)
9663
9664     # NIC buildup
9665     self.nics = []
9666     for idx, nic in enumerate(self.op.nics):
9667       nic_mode_req = nic.get(constants.INIC_MODE, None)
9668       nic_mode = nic_mode_req
9669       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9670         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9671
9672       # in routed mode, for the first nic, the default ip is 'auto'
9673       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9674         default_ip_mode = constants.VALUE_AUTO
9675       else:
9676         default_ip_mode = constants.VALUE_NONE
9677
9678       # ip validity checks
9679       ip = nic.get(constants.INIC_IP, default_ip_mode)
9680       if ip is None or ip.lower() == constants.VALUE_NONE:
9681         nic_ip = None
9682       elif ip.lower() == constants.VALUE_AUTO:
9683         if not self.op.name_check:
9684           raise errors.OpPrereqError("IP address set to auto but name checks"
9685                                      " have been skipped",
9686                                      errors.ECODE_INVAL)
9687         nic_ip = self.hostname1.ip
9688       else:
9689         if not netutils.IPAddress.IsValid(ip):
9690           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9691                                      errors.ECODE_INVAL)
9692         nic_ip = ip
9693
9694       # TODO: check the ip address for uniqueness
9695       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9696         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9697                                    errors.ECODE_INVAL)
9698
9699       # MAC address verification
9700       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9701       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9702         mac = utils.NormalizeAndValidateMac(mac)
9703
9704         try:
9705           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9706         except errors.ReservationError:
9707           raise errors.OpPrereqError("MAC address %s already in use"
9708                                      " in cluster" % mac,
9709                                      errors.ECODE_NOTUNIQUE)
9710
9711       #  Build nic parameters
9712       link = nic.get(constants.INIC_LINK, None)
9713       if link == constants.VALUE_AUTO:
9714         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9715       nicparams = {}
9716       if nic_mode_req:
9717         nicparams[constants.NIC_MODE] = nic_mode
9718       if link:
9719         nicparams[constants.NIC_LINK] = link
9720
9721       check_params = cluster.SimpleFillNIC(nicparams)
9722       objects.NIC.CheckParameterSyntax(check_params)
9723       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9724
9725     # disk checks/pre-build
9726     default_vg = self.cfg.GetVGName()
9727     self.disks = []
9728     for disk in self.op.disks:
9729       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9730       if mode not in constants.DISK_ACCESS_SET:
9731         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9732                                    mode, errors.ECODE_INVAL)
9733       size = disk.get(constants.IDISK_SIZE, None)
9734       if size is None:
9735         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9736       try:
9737         size = int(size)
9738       except (TypeError, ValueError):
9739         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9740                                    errors.ECODE_INVAL)
9741
9742       data_vg = disk.get(constants.IDISK_VG, default_vg)
9743       new_disk = {
9744         constants.IDISK_SIZE: size,
9745         constants.IDISK_MODE: mode,
9746         constants.IDISK_VG: data_vg,
9747         }
9748       if constants.IDISK_METAVG in disk:
9749         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9750       if constants.IDISK_ADOPT in disk:
9751         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9752       self.disks.append(new_disk)
9753
9754     if self.op.mode == constants.INSTANCE_IMPORT:
9755       disk_images = []
9756       for idx in range(len(self.disks)):
9757         option = "disk%d_dump" % idx
9758         if export_info.has_option(constants.INISECT_INS, option):
9759           # FIXME: are the old os-es, disk sizes, etc. useful?
9760           export_name = export_info.get(constants.INISECT_INS, option)
9761           image = utils.PathJoin(self.op.src_path, export_name)
9762           disk_images.append(image)
9763         else:
9764           disk_images.append(False)
9765
9766       self.src_images = disk_images
9767
9768       old_name = export_info.get(constants.INISECT_INS, "name")
9769       if self.op.instance_name == old_name:
9770         for idx, nic in enumerate(self.nics):
9771           if nic.mac == constants.VALUE_AUTO:
9772             nic_mac_ini = "nic%d_mac" % idx
9773             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9774
9775     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9776
9777     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9778     if self.op.ip_check:
9779       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9780         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9781                                    (self.check_ip, self.op.instance_name),
9782                                    errors.ECODE_NOTUNIQUE)
9783
9784     #### mac address generation
9785     # By generating here the mac address both the allocator and the hooks get
9786     # the real final mac address rather than the 'auto' or 'generate' value.
9787     # There is a race condition between the generation and the instance object
9788     # creation, which means that we know the mac is valid now, but we're not
9789     # sure it will be when we actually add the instance. If things go bad
9790     # adding the instance will abort because of a duplicate mac, and the
9791     # creation job will fail.
9792     for nic in self.nics:
9793       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9794         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9795
9796     #### allocator run
9797
9798     if self.op.iallocator is not None:
9799       self._RunAllocator()
9800
9801     # Release all unneeded node locks
9802     _ReleaseLocks(self, locking.LEVEL_NODE,
9803                   keep=filter(None, [self.op.pnode, self.op.snode,
9804                                      self.op.src_node]))
9805     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9806                   keep=filter(None, [self.op.pnode, self.op.snode,
9807                                      self.op.src_node]))
9808
9809     #### node related checks
9810
9811     # check primary node
9812     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9813     assert self.pnode is not None, \
9814       "Cannot retrieve locked node %s" % self.op.pnode
9815     if pnode.offline:
9816       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9817                                  pnode.name, errors.ECODE_STATE)
9818     if pnode.drained:
9819       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9820                                  pnode.name, errors.ECODE_STATE)
9821     if not pnode.vm_capable:
9822       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9823                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9824
9825     self.secondaries = []
9826
9827     # mirror node verification
9828     if self.op.disk_template in constants.DTS_INT_MIRROR:
9829       if self.op.snode == pnode.name:
9830         raise errors.OpPrereqError("The secondary node cannot be the"
9831                                    " primary node", errors.ECODE_INVAL)
9832       _CheckNodeOnline(self, self.op.snode)
9833       _CheckNodeNotDrained(self, self.op.snode)
9834       _CheckNodeVmCapable(self, self.op.snode)
9835       self.secondaries.append(self.op.snode)
9836
9837       snode = self.cfg.GetNodeInfo(self.op.snode)
9838       if pnode.group != snode.group:
9839         self.LogWarning("The primary and secondary nodes are in two"
9840                         " different node groups; the disk parameters"
9841                         " from the first disk's node group will be"
9842                         " used")
9843
9844     nodenames = [pnode.name] + self.secondaries
9845
9846     # Verify instance specs
9847     ispec = {
9848       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9849       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9850       constants.ISPEC_DISK_COUNT: len(self.disks),
9851       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9852       constants.ISPEC_NIC_COUNT: len(self.nics),
9853       }
9854
9855     group_info = self.cfg.GetNodeGroup(pnode.group)
9856     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9857     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9858     if not self.op.ignore_ipolicy and res:
9859       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9860                                   " policy: %s") % (pnode.group,
9861                                                     utils.CommaJoin(res)),
9862                                   errors.ECODE_INVAL)
9863
9864     # disk parameters (not customizable at instance or node level)
9865     # just use the primary node parameters, ignoring the secondary.
9866     self.diskparams = group_info.diskparams
9867
9868     if not self.adopt_disks:
9869       if self.op.disk_template == constants.DT_RBD:
9870         # _CheckRADOSFreeSpace() is just a placeholder.
9871         # Any function that checks prerequisites can be placed here.
9872         # Check if there is enough space on the RADOS cluster.
9873         _CheckRADOSFreeSpace()
9874       else:
9875         # Check lv size requirements, if not adopting
9876         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9877         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9878
9879     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9880       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9881                                 disk[constants.IDISK_ADOPT])
9882                      for disk in self.disks])
9883       if len(all_lvs) != len(self.disks):
9884         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9885                                    errors.ECODE_INVAL)
9886       for lv_name in all_lvs:
9887         try:
9888           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9889           # to ReserveLV uses the same syntax
9890           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9891         except errors.ReservationError:
9892           raise errors.OpPrereqError("LV named %s used by another instance" %
9893                                      lv_name, errors.ECODE_NOTUNIQUE)
9894
9895       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9896       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9897
9898       node_lvs = self.rpc.call_lv_list([pnode.name],
9899                                        vg_names.payload.keys())[pnode.name]
9900       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9901       node_lvs = node_lvs.payload
9902
9903       delta = all_lvs.difference(node_lvs.keys())
9904       if delta:
9905         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9906                                    utils.CommaJoin(delta),
9907                                    errors.ECODE_INVAL)
9908       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9909       if online_lvs:
9910         raise errors.OpPrereqError("Online logical volumes found, cannot"
9911                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9912                                    errors.ECODE_STATE)
9913       # update the size of disk based on what is found
9914       for dsk in self.disks:
9915         dsk[constants.IDISK_SIZE] = \
9916           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9917                                         dsk[constants.IDISK_ADOPT])][0]))
9918
9919     elif self.op.disk_template == constants.DT_BLOCK:
9920       # Normalize and de-duplicate device paths
9921       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9922                        for disk in self.disks])
9923       if len(all_disks) != len(self.disks):
9924         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9925                                    errors.ECODE_INVAL)
9926       baddisks = [d for d in all_disks
9927                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9928       if baddisks:
9929         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9930                                    " cannot be adopted" %
9931                                    (", ".join(baddisks),
9932                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9933                                    errors.ECODE_INVAL)
9934
9935       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9936                                             list(all_disks))[pnode.name]
9937       node_disks.Raise("Cannot get block device information from node %s" %
9938                        pnode.name)
9939       node_disks = node_disks.payload
9940       delta = all_disks.difference(node_disks.keys())
9941       if delta:
9942         raise errors.OpPrereqError("Missing block device(s): %s" %
9943                                    utils.CommaJoin(delta),
9944                                    errors.ECODE_INVAL)
9945       for dsk in self.disks:
9946         dsk[constants.IDISK_SIZE] = \
9947           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9948
9949     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9950
9951     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9952     # check OS parameters (remotely)
9953     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9954
9955     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9956
9957     # memory check on primary node
9958     #TODO(dynmem): use MINMEM for checking
9959     if self.op.start:
9960       _CheckNodeFreeMemory(self, self.pnode.name,
9961                            "creating instance %s" % self.op.instance_name,
9962                            self.be_full[constants.BE_MAXMEM],
9963                            self.op.hypervisor)
9964
9965     self.dry_run_result = list(nodenames)
9966
9967   def Exec(self, feedback_fn):
9968     """Create and add the instance to the cluster.
9969
9970     """
9971     instance = self.op.instance_name
9972     pnode_name = self.pnode.name
9973
9974     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9975                 self.owned_locks(locking.LEVEL_NODE)), \
9976       "Node locks differ from node resource locks"
9977
9978     ht_kind = self.op.hypervisor
9979     if ht_kind in constants.HTS_REQ_PORT:
9980       network_port = self.cfg.AllocatePort()
9981     else:
9982       network_port = None
9983
9984     disks = _GenerateDiskTemplate(self,
9985                                   self.op.disk_template,
9986                                   instance, pnode_name,
9987                                   self.secondaries,
9988                                   self.disks,
9989                                   self.instance_file_storage_dir,
9990                                   self.op.file_driver,
9991                                   0,
9992                                   feedback_fn,
9993                                   self.diskparams)
9994
9995     iobj = objects.Instance(name=instance, os=self.op.os_type,
9996                             primary_node=pnode_name,
9997                             nics=self.nics, disks=disks,
9998                             disk_template=self.op.disk_template,
9999                             admin_state=constants.ADMINST_DOWN,
10000                             network_port=network_port,
10001                             beparams=self.op.beparams,
10002                             hvparams=self.op.hvparams,
10003                             hypervisor=self.op.hypervisor,
10004                             osparams=self.op.osparams,
10005                             )
10006
10007     if self.op.tags:
10008       for tag in self.op.tags:
10009         iobj.AddTag(tag)
10010
10011     if self.adopt_disks:
10012       if self.op.disk_template == constants.DT_PLAIN:
10013         # rename LVs to the newly-generated names; we need to construct
10014         # 'fake' LV disks with the old data, plus the new unique_id
10015         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10016         rename_to = []
10017         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10018           rename_to.append(t_dsk.logical_id)
10019           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10020           self.cfg.SetDiskID(t_dsk, pnode_name)
10021         result = self.rpc.call_blockdev_rename(pnode_name,
10022                                                zip(tmp_disks, rename_to))
10023         result.Raise("Failed to rename adoped LVs")
10024     else:
10025       feedback_fn("* creating instance disks...")
10026       try:
10027         _CreateDisks(self, iobj)
10028       except errors.OpExecError:
10029         self.LogWarning("Device creation failed, reverting...")
10030         try:
10031           _RemoveDisks(self, iobj)
10032         finally:
10033           self.cfg.ReleaseDRBDMinors(instance)
10034           raise
10035
10036     feedback_fn("adding instance %s to cluster config" % instance)
10037
10038     self.cfg.AddInstance(iobj, self.proc.GetECId())
10039
10040     # Declare that we don't want to remove the instance lock anymore, as we've
10041     # added the instance to the config
10042     del self.remove_locks[locking.LEVEL_INSTANCE]
10043
10044     if self.op.mode == constants.INSTANCE_IMPORT:
10045       # Release unused nodes
10046       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10047     else:
10048       # Release all nodes
10049       _ReleaseLocks(self, locking.LEVEL_NODE)
10050
10051     disk_abort = False
10052     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10053       feedback_fn("* wiping instance disks...")
10054       try:
10055         _WipeDisks(self, iobj)
10056       except errors.OpExecError, err:
10057         logging.exception("Wiping disks failed")
10058         self.LogWarning("Wiping instance disks failed (%s)", err)
10059         disk_abort = True
10060
10061     if disk_abort:
10062       # Something is already wrong with the disks, don't do anything else
10063       pass
10064     elif self.op.wait_for_sync:
10065       disk_abort = not _WaitForSync(self, iobj)
10066     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10067       # make sure the disks are not degraded (still sync-ing is ok)
10068       feedback_fn("* checking mirrors status")
10069       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10070     else:
10071       disk_abort = False
10072
10073     if disk_abort:
10074       _RemoveDisks(self, iobj)
10075       self.cfg.RemoveInstance(iobj.name)
10076       # Make sure the instance lock gets removed
10077       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10078       raise errors.OpExecError("There are some degraded disks for"
10079                                " this instance")
10080
10081     # Release all node resource locks
10082     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10083
10084     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10085       if self.op.mode == constants.INSTANCE_CREATE:
10086         if not self.op.no_install:
10087           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10088                         not self.op.wait_for_sync)
10089           if pause_sync:
10090             feedback_fn("* pausing disk sync to install instance OS")
10091             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10092                                                               iobj.disks, True)
10093             for idx, success in enumerate(result.payload):
10094               if not success:
10095                 logging.warn("pause-sync of instance %s for disk %d failed",
10096                              instance, idx)
10097
10098           feedback_fn("* running the instance OS create scripts...")
10099           # FIXME: pass debug option from opcode to backend
10100           os_add_result = \
10101             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10102                                           self.op.debug_level)
10103           if pause_sync:
10104             feedback_fn("* resuming disk sync")
10105             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10106                                                               iobj.disks, False)
10107             for idx, success in enumerate(result.payload):
10108               if not success:
10109                 logging.warn("resume-sync of instance %s for disk %d failed",
10110                              instance, idx)
10111
10112           os_add_result.Raise("Could not add os for instance %s"
10113                               " on node %s" % (instance, pnode_name))
10114
10115       elif self.op.mode == constants.INSTANCE_IMPORT:
10116         feedback_fn("* running the instance OS import scripts...")
10117
10118         transfers = []
10119
10120         for idx, image in enumerate(self.src_images):
10121           if not image:
10122             continue
10123
10124           # FIXME: pass debug option from opcode to backend
10125           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10126                                              constants.IEIO_FILE, (image, ),
10127                                              constants.IEIO_SCRIPT,
10128                                              (iobj.disks[idx], idx),
10129                                              None)
10130           transfers.append(dt)
10131
10132         import_result = \
10133           masterd.instance.TransferInstanceData(self, feedback_fn,
10134                                                 self.op.src_node, pnode_name,
10135                                                 self.pnode.secondary_ip,
10136                                                 iobj, transfers)
10137         if not compat.all(import_result):
10138           self.LogWarning("Some disks for instance %s on node %s were not"
10139                           " imported successfully" % (instance, pnode_name))
10140
10141       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10142         feedback_fn("* preparing remote import...")
10143         # The source cluster will stop the instance before attempting to make a
10144         # connection. In some cases stopping an instance can take a long time,
10145         # hence the shutdown timeout is added to the connection timeout.
10146         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10147                            self.op.source_shutdown_timeout)
10148         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10149
10150         assert iobj.primary_node == self.pnode.name
10151         disk_results = \
10152           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10153                                         self.source_x509_ca,
10154                                         self._cds, timeouts)
10155         if not compat.all(disk_results):
10156           # TODO: Should the instance still be started, even if some disks
10157           # failed to import (valid for local imports, too)?
10158           self.LogWarning("Some disks for instance %s on node %s were not"
10159                           " imported successfully" % (instance, pnode_name))
10160
10161         # Run rename script on newly imported instance
10162         assert iobj.name == instance
10163         feedback_fn("Running rename script for %s" % instance)
10164         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10165                                                    self.source_instance_name,
10166                                                    self.op.debug_level)
10167         if result.fail_msg:
10168           self.LogWarning("Failed to run rename script for %s on node"
10169                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10170
10171       else:
10172         # also checked in the prereq part
10173         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10174                                      % self.op.mode)
10175
10176     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10177
10178     if self.op.start:
10179       iobj.admin_state = constants.ADMINST_UP
10180       self.cfg.Update(iobj, feedback_fn)
10181       logging.info("Starting instance %s on node %s", instance, pnode_name)
10182       feedback_fn("* starting instance...")
10183       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10184                                             False)
10185       result.Raise("Could not start instance")
10186
10187     return list(iobj.all_nodes)
10188
10189
10190 def _CheckRADOSFreeSpace():
10191   """Compute disk size requirements inside the RADOS cluster.
10192
10193   """
10194   # For the RADOS cluster we assume there is always enough space.
10195   pass
10196
10197
10198 class LUInstanceConsole(NoHooksLU):
10199   """Connect to an instance's console.
10200
10201   This is somewhat special in that it returns the command line that
10202   you need to run on the master node in order to connect to the
10203   console.
10204
10205   """
10206   REQ_BGL = False
10207
10208   def ExpandNames(self):
10209     self.share_locks = _ShareAll()
10210     self._ExpandAndLockInstance()
10211
10212   def CheckPrereq(self):
10213     """Check prerequisites.
10214
10215     This checks that the instance is in the cluster.
10216
10217     """
10218     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10219     assert self.instance is not None, \
10220       "Cannot retrieve locked instance %s" % self.op.instance_name
10221     _CheckNodeOnline(self, self.instance.primary_node)
10222
10223   def Exec(self, feedback_fn):
10224     """Connect to the console of an instance
10225
10226     """
10227     instance = self.instance
10228     node = instance.primary_node
10229
10230     node_insts = self.rpc.call_instance_list([node],
10231                                              [instance.hypervisor])[node]
10232     node_insts.Raise("Can't get node information from %s" % node)
10233
10234     if instance.name not in node_insts.payload:
10235       if instance.admin_state == constants.ADMINST_UP:
10236         state = constants.INSTST_ERRORDOWN
10237       elif instance.admin_state == constants.ADMINST_DOWN:
10238         state = constants.INSTST_ADMINDOWN
10239       else:
10240         state = constants.INSTST_ADMINOFFLINE
10241       raise errors.OpExecError("Instance %s is not running (state %s)" %
10242                                (instance.name, state))
10243
10244     logging.debug("Connecting to console of %s on %s", instance.name, node)
10245
10246     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10247
10248
10249 def _GetInstanceConsole(cluster, instance):
10250   """Returns console information for an instance.
10251
10252   @type cluster: L{objects.Cluster}
10253   @type instance: L{objects.Instance}
10254   @rtype: dict
10255
10256   """
10257   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10258   # beparams and hvparams are passed separately, to avoid editing the
10259   # instance and then saving the defaults in the instance itself.
10260   hvparams = cluster.FillHV(instance)
10261   beparams = cluster.FillBE(instance)
10262   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10263
10264   assert console.instance == instance.name
10265   assert console.Validate()
10266
10267   return console.ToDict()
10268
10269
10270 class LUInstanceReplaceDisks(LogicalUnit):
10271   """Replace the disks of an instance.
10272
10273   """
10274   HPATH = "mirrors-replace"
10275   HTYPE = constants.HTYPE_INSTANCE
10276   REQ_BGL = False
10277
10278   def CheckArguments(self):
10279     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10280                                   self.op.iallocator)
10281
10282   def ExpandNames(self):
10283     self._ExpandAndLockInstance()
10284
10285     assert locking.LEVEL_NODE not in self.needed_locks
10286     assert locking.LEVEL_NODE_RES not in self.needed_locks
10287     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10288
10289     assert self.op.iallocator is None or self.op.remote_node is None, \
10290       "Conflicting options"
10291
10292     if self.op.remote_node is not None:
10293       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10294
10295       # Warning: do not remove the locking of the new secondary here
10296       # unless DRBD8.AddChildren is changed to work in parallel;
10297       # currently it doesn't since parallel invocations of
10298       # FindUnusedMinor will conflict
10299       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10300       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10301     else:
10302       self.needed_locks[locking.LEVEL_NODE] = []
10303       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10304
10305       if self.op.iallocator is not None:
10306         # iallocator will select a new node in the same group
10307         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10308
10309     self.needed_locks[locking.LEVEL_NODE_RES] = []
10310
10311     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10312                                    self.op.iallocator, self.op.remote_node,
10313                                    self.op.disks, False, self.op.early_release,
10314                                    self.op.ignore_ipolicy)
10315
10316     self.tasklets = [self.replacer]
10317
10318   def DeclareLocks(self, level):
10319     if level == locking.LEVEL_NODEGROUP:
10320       assert self.op.remote_node is None
10321       assert self.op.iallocator is not None
10322       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10323
10324       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10325       # Lock all groups used by instance optimistically; this requires going
10326       # via the node before it's locked, requiring verification later on
10327       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10328         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10329
10330     elif level == locking.LEVEL_NODE:
10331       if self.op.iallocator is not None:
10332         assert self.op.remote_node is None
10333         assert not self.needed_locks[locking.LEVEL_NODE]
10334
10335         # Lock member nodes of all locked groups
10336         self.needed_locks[locking.LEVEL_NODE] = [node_name
10337           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10338           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10339       else:
10340         self._LockInstancesNodes()
10341     elif level == locking.LEVEL_NODE_RES:
10342       # Reuse node locks
10343       self.needed_locks[locking.LEVEL_NODE_RES] = \
10344         self.needed_locks[locking.LEVEL_NODE]
10345
10346   def BuildHooksEnv(self):
10347     """Build hooks env.
10348
10349     This runs on the master, the primary and all the secondaries.
10350
10351     """
10352     instance = self.replacer.instance
10353     env = {
10354       "MODE": self.op.mode,
10355       "NEW_SECONDARY": self.op.remote_node,
10356       "OLD_SECONDARY": instance.secondary_nodes[0],
10357       }
10358     env.update(_BuildInstanceHookEnvByObject(self, instance))
10359     return env
10360
10361   def BuildHooksNodes(self):
10362     """Build hooks nodes.
10363
10364     """
10365     instance = self.replacer.instance
10366     nl = [
10367       self.cfg.GetMasterNode(),
10368       instance.primary_node,
10369       ]
10370     if self.op.remote_node is not None:
10371       nl.append(self.op.remote_node)
10372     return nl, nl
10373
10374   def CheckPrereq(self):
10375     """Check prerequisites.
10376
10377     """
10378     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10379             self.op.iallocator is None)
10380
10381     # Verify if node group locks are still correct
10382     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10383     if owned_groups:
10384       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10385
10386     return LogicalUnit.CheckPrereq(self)
10387
10388
10389 class TLReplaceDisks(Tasklet):
10390   """Replaces disks for an instance.
10391
10392   Note: Locking is not within the scope of this class.
10393
10394   """
10395   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10396                disks, delay_iallocator, early_release, ignore_ipolicy):
10397     """Initializes this class.
10398
10399     """
10400     Tasklet.__init__(self, lu)
10401
10402     # Parameters
10403     self.instance_name = instance_name
10404     self.mode = mode
10405     self.iallocator_name = iallocator_name
10406     self.remote_node = remote_node
10407     self.disks = disks
10408     self.delay_iallocator = delay_iallocator
10409     self.early_release = early_release
10410     self.ignore_ipolicy = ignore_ipolicy
10411
10412     # Runtime data
10413     self.instance = None
10414     self.new_node = None
10415     self.target_node = None
10416     self.other_node = None
10417     self.remote_node_info = None
10418     self.node_secondary_ip = None
10419
10420   @staticmethod
10421   def CheckArguments(mode, remote_node, iallocator):
10422     """Helper function for users of this class.
10423
10424     """
10425     # check for valid parameter combination
10426     if mode == constants.REPLACE_DISK_CHG:
10427       if remote_node is None and iallocator is None:
10428         raise errors.OpPrereqError("When changing the secondary either an"
10429                                    " iallocator script must be used or the"
10430                                    " new node given", errors.ECODE_INVAL)
10431
10432       if remote_node is not None and iallocator is not None:
10433         raise errors.OpPrereqError("Give either the iallocator or the new"
10434                                    " secondary, not both", errors.ECODE_INVAL)
10435
10436     elif remote_node is not None or iallocator is not None:
10437       # Not replacing the secondary
10438       raise errors.OpPrereqError("The iallocator and new node options can"
10439                                  " only be used when changing the"
10440                                  " secondary node", errors.ECODE_INVAL)
10441
10442   @staticmethod
10443   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10444     """Compute a new secondary node using an IAllocator.
10445
10446     """
10447     ial = IAllocator(lu.cfg, lu.rpc,
10448                      mode=constants.IALLOCATOR_MODE_RELOC,
10449                      name=instance_name,
10450                      relocate_from=list(relocate_from))
10451
10452     ial.Run(iallocator_name)
10453
10454     if not ial.success:
10455       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10456                                  " %s" % (iallocator_name, ial.info),
10457                                  errors.ECODE_NORES)
10458
10459     if len(ial.result) != ial.required_nodes:
10460       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10461                                  " of nodes (%s), required %s" %
10462                                  (iallocator_name,
10463                                   len(ial.result), ial.required_nodes),
10464                                  errors.ECODE_FAULT)
10465
10466     remote_node_name = ial.result[0]
10467
10468     lu.LogInfo("Selected new secondary for instance '%s': %s",
10469                instance_name, remote_node_name)
10470
10471     return remote_node_name
10472
10473   def _FindFaultyDisks(self, node_name):
10474     """Wrapper for L{_FindFaultyInstanceDisks}.
10475
10476     """
10477     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10478                                     node_name, True)
10479
10480   def _CheckDisksActivated(self, instance):
10481     """Checks if the instance disks are activated.
10482
10483     @param instance: The instance to check disks
10484     @return: True if they are activated, False otherwise
10485
10486     """
10487     nodes = instance.all_nodes
10488
10489     for idx, dev in enumerate(instance.disks):
10490       for node in nodes:
10491         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10492         self.cfg.SetDiskID(dev, node)
10493
10494         result = self.rpc.call_blockdev_find(node, dev)
10495
10496         if result.offline:
10497           continue
10498         elif result.fail_msg or not result.payload:
10499           return False
10500
10501     return True
10502
10503   def CheckPrereq(self):
10504     """Check prerequisites.
10505
10506     This checks that the instance is in the cluster.
10507
10508     """
10509     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10510     assert instance is not None, \
10511       "Cannot retrieve locked instance %s" % self.instance_name
10512
10513     if instance.disk_template != constants.DT_DRBD8:
10514       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10515                                  " instances", errors.ECODE_INVAL)
10516
10517     if len(instance.secondary_nodes) != 1:
10518       raise errors.OpPrereqError("The instance has a strange layout,"
10519                                  " expected one secondary but found %d" %
10520                                  len(instance.secondary_nodes),
10521                                  errors.ECODE_FAULT)
10522
10523     if not self.delay_iallocator:
10524       self._CheckPrereq2()
10525
10526   def _CheckPrereq2(self):
10527     """Check prerequisites, second part.
10528
10529     This function should always be part of CheckPrereq. It was separated and is
10530     now called from Exec because during node evacuation iallocator was only
10531     called with an unmodified cluster model, not taking planned changes into
10532     account.
10533
10534     """
10535     instance = self.instance
10536     secondary_node = instance.secondary_nodes[0]
10537
10538     if self.iallocator_name is None:
10539       remote_node = self.remote_node
10540     else:
10541       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10542                                        instance.name, instance.secondary_nodes)
10543
10544     if remote_node is None:
10545       self.remote_node_info = None
10546     else:
10547       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10548              "Remote node '%s' is not locked" % remote_node
10549
10550       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10551       assert self.remote_node_info is not None, \
10552         "Cannot retrieve locked node %s" % remote_node
10553
10554     if remote_node == self.instance.primary_node:
10555       raise errors.OpPrereqError("The specified node is the primary node of"
10556                                  " the instance", errors.ECODE_INVAL)
10557
10558     if remote_node == secondary_node:
10559       raise errors.OpPrereqError("The specified node is already the"
10560                                  " secondary node of the instance",
10561                                  errors.ECODE_INVAL)
10562
10563     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10564                                     constants.REPLACE_DISK_CHG):
10565       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10566                                  errors.ECODE_INVAL)
10567
10568     if self.mode == constants.REPLACE_DISK_AUTO:
10569       if not self._CheckDisksActivated(instance):
10570         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10571                                    " first" % self.instance_name,
10572                                    errors.ECODE_STATE)
10573       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10574       faulty_secondary = self._FindFaultyDisks(secondary_node)
10575
10576       if faulty_primary and faulty_secondary:
10577         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10578                                    " one node and can not be repaired"
10579                                    " automatically" % self.instance_name,
10580                                    errors.ECODE_STATE)
10581
10582       if faulty_primary:
10583         self.disks = faulty_primary
10584         self.target_node = instance.primary_node
10585         self.other_node = secondary_node
10586         check_nodes = [self.target_node, self.other_node]
10587       elif faulty_secondary:
10588         self.disks = faulty_secondary
10589         self.target_node = secondary_node
10590         self.other_node = instance.primary_node
10591         check_nodes = [self.target_node, self.other_node]
10592       else:
10593         self.disks = []
10594         check_nodes = []
10595
10596     else:
10597       # Non-automatic modes
10598       if self.mode == constants.REPLACE_DISK_PRI:
10599         self.target_node = instance.primary_node
10600         self.other_node = secondary_node
10601         check_nodes = [self.target_node, self.other_node]
10602
10603       elif self.mode == constants.REPLACE_DISK_SEC:
10604         self.target_node = secondary_node
10605         self.other_node = instance.primary_node
10606         check_nodes = [self.target_node, self.other_node]
10607
10608       elif self.mode == constants.REPLACE_DISK_CHG:
10609         self.new_node = remote_node
10610         self.other_node = instance.primary_node
10611         self.target_node = secondary_node
10612         check_nodes = [self.new_node, self.other_node]
10613
10614         _CheckNodeNotDrained(self.lu, remote_node)
10615         _CheckNodeVmCapable(self.lu, remote_node)
10616
10617         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10618         assert old_node_info is not None
10619         if old_node_info.offline and not self.early_release:
10620           # doesn't make sense to delay the release
10621           self.early_release = True
10622           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10623                           " early-release mode", secondary_node)
10624
10625       else:
10626         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10627                                      self.mode)
10628
10629       # If not specified all disks should be replaced
10630       if not self.disks:
10631         self.disks = range(len(self.instance.disks))
10632
10633     # TODO: This is ugly, but right now we can't distinguish between internal
10634     # submitted opcode and external one. We should fix that.
10635     if self.remote_node_info:
10636       # We change the node, lets verify it still meets instance policy
10637       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10638       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10639                                        new_group_info)
10640       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10641                               ignore=self.ignore_ipolicy)
10642
10643     # TODO: compute disk parameters
10644     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10645     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10646     if primary_node_info.group != secondary_node_info.group:
10647       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10648                       " different node groups; the disk parameters of the"
10649                       " primary node's group will be applied.")
10650
10651     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10652
10653     for node in check_nodes:
10654       _CheckNodeOnline(self.lu, node)
10655
10656     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10657                                                           self.other_node,
10658                                                           self.target_node]
10659                               if node_name is not None)
10660
10661     # Release unneeded node and node resource locks
10662     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10663     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10664
10665     # Release any owned node group
10666     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10667       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10668
10669     # Check whether disks are valid
10670     for disk_idx in self.disks:
10671       instance.FindDisk(disk_idx)
10672
10673     # Get secondary node IP addresses
10674     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10675                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10676
10677   def Exec(self, feedback_fn):
10678     """Execute disk replacement.
10679
10680     This dispatches the disk replacement to the appropriate handler.
10681
10682     """
10683     if self.delay_iallocator:
10684       self._CheckPrereq2()
10685
10686     if __debug__:
10687       # Verify owned locks before starting operation
10688       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10689       assert set(owned_nodes) == set(self.node_secondary_ip), \
10690           ("Incorrect node locks, owning %s, expected %s" %
10691            (owned_nodes, self.node_secondary_ip.keys()))
10692       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10693               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10694
10695       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10696       assert list(owned_instances) == [self.instance_name], \
10697           "Instance '%s' not locked" % self.instance_name
10698
10699       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10700           "Should not own any node group lock at this point"
10701
10702     if not self.disks:
10703       feedback_fn("No disks need replacement")
10704       return
10705
10706     feedback_fn("Replacing disk(s) %s for %s" %
10707                 (utils.CommaJoin(self.disks), self.instance.name))
10708
10709     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10710
10711     # Activate the instance disks if we're replacing them on a down instance
10712     if activate_disks:
10713       _StartInstanceDisks(self.lu, self.instance, True)
10714
10715     try:
10716       # Should we replace the secondary node?
10717       if self.new_node is not None:
10718         fn = self._ExecDrbd8Secondary
10719       else:
10720         fn = self._ExecDrbd8DiskOnly
10721
10722       result = fn(feedback_fn)
10723     finally:
10724       # Deactivate the instance disks if we're replacing them on a
10725       # down instance
10726       if activate_disks:
10727         _SafeShutdownInstanceDisks(self.lu, self.instance)
10728
10729     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10730
10731     if __debug__:
10732       # Verify owned locks
10733       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10734       nodes = frozenset(self.node_secondary_ip)
10735       assert ((self.early_release and not owned_nodes) or
10736               (not self.early_release and not (set(owned_nodes) - nodes))), \
10737         ("Not owning the correct locks, early_release=%s, owned=%r,"
10738          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10739
10740     return result
10741
10742   def _CheckVolumeGroup(self, nodes):
10743     self.lu.LogInfo("Checking volume groups")
10744
10745     vgname = self.cfg.GetVGName()
10746
10747     # Make sure volume group exists on all involved nodes
10748     results = self.rpc.call_vg_list(nodes)
10749     if not results:
10750       raise errors.OpExecError("Can't list volume groups on the nodes")
10751
10752     for node in nodes:
10753       res = results[node]
10754       res.Raise("Error checking node %s" % node)
10755       if vgname not in res.payload:
10756         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10757                                  (vgname, node))
10758
10759   def _CheckDisksExistence(self, nodes):
10760     # Check disk existence
10761     for idx, dev in enumerate(self.instance.disks):
10762       if idx not in self.disks:
10763         continue
10764
10765       for node in nodes:
10766         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10767         self.cfg.SetDiskID(dev, node)
10768
10769         result = self.rpc.call_blockdev_find(node, dev)
10770
10771         msg = result.fail_msg
10772         if msg or not result.payload:
10773           if not msg:
10774             msg = "disk not found"
10775           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10776                                    (idx, node, msg))
10777
10778   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10779     for idx, dev in enumerate(self.instance.disks):
10780       if idx not in self.disks:
10781         continue
10782
10783       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10784                       (idx, node_name))
10785
10786       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10787                                    ldisk=ldisk):
10788         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10789                                  " replace disks for instance %s" %
10790                                  (node_name, self.instance.name))
10791
10792   def _CreateNewStorage(self, node_name):
10793     """Create new storage on the primary or secondary node.
10794
10795     This is only used for same-node replaces, not for changing the
10796     secondary node, hence we don't want to modify the existing disk.
10797
10798     """
10799     iv_names = {}
10800
10801     for idx, dev in enumerate(self.instance.disks):
10802       if idx not in self.disks:
10803         continue
10804
10805       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10806
10807       self.cfg.SetDiskID(dev, node_name)
10808
10809       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10810       names = _GenerateUniqueNames(self.lu, lv_names)
10811
10812       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10813
10814       vg_data = dev.children[0].logical_id[0]
10815       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10816                              logical_id=(vg_data, names[0]), params=data_p)
10817       vg_meta = dev.children[1].logical_id[0]
10818       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10819                              logical_id=(vg_meta, names[1]), params=meta_p)
10820
10821       new_lvs = [lv_data, lv_meta]
10822       old_lvs = [child.Copy() for child in dev.children]
10823       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10824
10825       # we pass force_create=True to force the LVM creation
10826       for new_lv in new_lvs:
10827         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10828                         _GetInstanceInfoText(self.instance), False)
10829
10830     return iv_names
10831
10832   def _CheckDevices(self, node_name, iv_names):
10833     for name, (dev, _, _) in iv_names.iteritems():
10834       self.cfg.SetDiskID(dev, node_name)
10835
10836       result = self.rpc.call_blockdev_find(node_name, dev)
10837
10838       msg = result.fail_msg
10839       if msg or not result.payload:
10840         if not msg:
10841           msg = "disk not found"
10842         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10843                                  (name, msg))
10844
10845       if result.payload.is_degraded:
10846         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10847
10848   def _RemoveOldStorage(self, node_name, iv_names):
10849     for name, (_, old_lvs, _) in iv_names.iteritems():
10850       self.lu.LogInfo("Remove logical volumes for %s" % name)
10851
10852       for lv in old_lvs:
10853         self.cfg.SetDiskID(lv, node_name)
10854
10855         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10856         if msg:
10857           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10858                              hint="remove unused LVs manually")
10859
10860   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10861     """Replace a disk on the primary or secondary for DRBD 8.
10862
10863     The algorithm for replace is quite complicated:
10864
10865       1. for each disk to be replaced:
10866
10867         1. create new LVs on the target node with unique names
10868         1. detach old LVs from the drbd device
10869         1. rename old LVs to name_replaced.<time_t>
10870         1. rename new LVs to old LVs
10871         1. attach the new LVs (with the old names now) to the drbd device
10872
10873       1. wait for sync across all devices
10874
10875       1. for each modified disk:
10876
10877         1. remove old LVs (which have the name name_replaces.<time_t>)
10878
10879     Failures are not very well handled.
10880
10881     """
10882     steps_total = 6
10883
10884     # Step: check device activation
10885     self.lu.LogStep(1, steps_total, "Check device existence")
10886     self._CheckDisksExistence([self.other_node, self.target_node])
10887     self._CheckVolumeGroup([self.target_node, self.other_node])
10888
10889     # Step: check other node consistency
10890     self.lu.LogStep(2, steps_total, "Check peer consistency")
10891     self._CheckDisksConsistency(self.other_node,
10892                                 self.other_node == self.instance.primary_node,
10893                                 False)
10894
10895     # Step: create new storage
10896     self.lu.LogStep(3, steps_total, "Allocate new storage")
10897     iv_names = self._CreateNewStorage(self.target_node)
10898
10899     # Step: for each lv, detach+rename*2+attach
10900     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10901     for dev, old_lvs, new_lvs in iv_names.itervalues():
10902       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10903
10904       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10905                                                      old_lvs)
10906       result.Raise("Can't detach drbd from local storage on node"
10907                    " %s for device %s" % (self.target_node, dev.iv_name))
10908       #dev.children = []
10909       #cfg.Update(instance)
10910
10911       # ok, we created the new LVs, so now we know we have the needed
10912       # storage; as such, we proceed on the target node to rename
10913       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10914       # using the assumption that logical_id == physical_id (which in
10915       # turn is the unique_id on that node)
10916
10917       # FIXME(iustin): use a better name for the replaced LVs
10918       temp_suffix = int(time.time())
10919       ren_fn = lambda d, suff: (d.physical_id[0],
10920                                 d.physical_id[1] + "_replaced-%s" % suff)
10921
10922       # Build the rename list based on what LVs exist on the node
10923       rename_old_to_new = []
10924       for to_ren in old_lvs:
10925         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10926         if not result.fail_msg and result.payload:
10927           # device exists
10928           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10929
10930       self.lu.LogInfo("Renaming the old LVs on the target node")
10931       result = self.rpc.call_blockdev_rename(self.target_node,
10932                                              rename_old_to_new)
10933       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10934
10935       # Now we rename the new LVs to the old LVs
10936       self.lu.LogInfo("Renaming the new LVs on the target node")
10937       rename_new_to_old = [(new, old.physical_id)
10938                            for old, new in zip(old_lvs, new_lvs)]
10939       result = self.rpc.call_blockdev_rename(self.target_node,
10940                                              rename_new_to_old)
10941       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10942
10943       # Intermediate steps of in memory modifications
10944       for old, new in zip(old_lvs, new_lvs):
10945         new.logical_id = old.logical_id
10946         self.cfg.SetDiskID(new, self.target_node)
10947
10948       # We need to modify old_lvs so that removal later removes the
10949       # right LVs, not the newly added ones; note that old_lvs is a
10950       # copy here
10951       for disk in old_lvs:
10952         disk.logical_id = ren_fn(disk, temp_suffix)
10953         self.cfg.SetDiskID(disk, self.target_node)
10954
10955       # Now that the new lvs have the old name, we can add them to the device
10956       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10957       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10958                                                   new_lvs)
10959       msg = result.fail_msg
10960       if msg:
10961         for new_lv in new_lvs:
10962           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10963                                                new_lv).fail_msg
10964           if msg2:
10965             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10966                                hint=("cleanup manually the unused logical"
10967                                      "volumes"))
10968         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10969
10970     cstep = itertools.count(5)
10971
10972     if self.early_release:
10973       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10974       self._RemoveOldStorage(self.target_node, iv_names)
10975       # TODO: Check if releasing locks early still makes sense
10976       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10977     else:
10978       # Release all resource locks except those used by the instance
10979       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10980                     keep=self.node_secondary_ip.keys())
10981
10982     # Release all node locks while waiting for sync
10983     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10984
10985     # TODO: Can the instance lock be downgraded here? Take the optional disk
10986     # shutdown in the caller into consideration.
10987
10988     # Wait for sync
10989     # This can fail as the old devices are degraded and _WaitForSync
10990     # does a combined result over all disks, so we don't check its return value
10991     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10992     _WaitForSync(self.lu, self.instance)
10993
10994     # Check all devices manually
10995     self._CheckDevices(self.instance.primary_node, iv_names)
10996
10997     # Step: remove old storage
10998     if not self.early_release:
10999       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11000       self._RemoveOldStorage(self.target_node, iv_names)
11001
11002   def _ExecDrbd8Secondary(self, feedback_fn):
11003     """Replace the secondary node for DRBD 8.
11004
11005     The algorithm for replace is quite complicated:
11006       - for all disks of the instance:
11007         - create new LVs on the new node with same names
11008         - shutdown the drbd device on the old secondary
11009         - disconnect the drbd network on the primary
11010         - create the drbd device on the new secondary
11011         - network attach the drbd on the primary, using an artifice:
11012           the drbd code for Attach() will connect to the network if it
11013           finds a device which is connected to the good local disks but
11014           not network enabled
11015       - wait for sync across all devices
11016       - remove all disks from the old secondary
11017
11018     Failures are not very well handled.
11019
11020     """
11021     steps_total = 6
11022
11023     pnode = self.instance.primary_node
11024
11025     # Step: check device activation
11026     self.lu.LogStep(1, steps_total, "Check device existence")
11027     self._CheckDisksExistence([self.instance.primary_node])
11028     self._CheckVolumeGroup([self.instance.primary_node])
11029
11030     # Step: check other node consistency
11031     self.lu.LogStep(2, steps_total, "Check peer consistency")
11032     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11033
11034     # Step: create new storage
11035     self.lu.LogStep(3, steps_total, "Allocate new storage")
11036     for idx, dev in enumerate(self.instance.disks):
11037       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11038                       (self.new_node, idx))
11039       # we pass force_create=True to force LVM creation
11040       for new_lv in dev.children:
11041         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11042                         _GetInstanceInfoText(self.instance), False)
11043
11044     # Step 4: dbrd minors and drbd setups changes
11045     # after this, we must manually remove the drbd minors on both the
11046     # error and the success paths
11047     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11048     minors = self.cfg.AllocateDRBDMinor([self.new_node
11049                                          for dev in self.instance.disks],
11050                                         self.instance.name)
11051     logging.debug("Allocated minors %r", minors)
11052
11053     iv_names = {}
11054     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11055       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11056                       (self.new_node, idx))
11057       # create new devices on new_node; note that we create two IDs:
11058       # one without port, so the drbd will be activated without
11059       # networking information on the new node at this stage, and one
11060       # with network, for the latter activation in step 4
11061       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11062       if self.instance.primary_node == o_node1:
11063         p_minor = o_minor1
11064       else:
11065         assert self.instance.primary_node == o_node2, "Three-node instance?"
11066         p_minor = o_minor2
11067
11068       new_alone_id = (self.instance.primary_node, self.new_node, None,
11069                       p_minor, new_minor, o_secret)
11070       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11071                     p_minor, new_minor, o_secret)
11072
11073       iv_names[idx] = (dev, dev.children, new_net_id)
11074       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11075                     new_net_id)
11076       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11077       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11078                               logical_id=new_alone_id,
11079                               children=dev.children,
11080                               size=dev.size,
11081                               params=drbd_params)
11082       try:
11083         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11084                               _GetInstanceInfoText(self.instance), False)
11085       except errors.GenericError:
11086         self.cfg.ReleaseDRBDMinors(self.instance.name)
11087         raise
11088
11089     # We have new devices, shutdown the drbd on the old secondary
11090     for idx, dev in enumerate(self.instance.disks):
11091       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11092       self.cfg.SetDiskID(dev, self.target_node)
11093       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11094       if msg:
11095         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11096                            "node: %s" % (idx, msg),
11097                            hint=("Please cleanup this device manually as"
11098                                  " soon as possible"))
11099
11100     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11101     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11102                                                self.instance.disks)[pnode]
11103
11104     msg = result.fail_msg
11105     if msg:
11106       # detaches didn't succeed (unlikely)
11107       self.cfg.ReleaseDRBDMinors(self.instance.name)
11108       raise errors.OpExecError("Can't detach the disks from the network on"
11109                                " old node: %s" % (msg,))
11110
11111     # if we managed to detach at least one, we update all the disks of
11112     # the instance to point to the new secondary
11113     self.lu.LogInfo("Updating instance configuration")
11114     for dev, _, new_logical_id in iv_names.itervalues():
11115       dev.logical_id = new_logical_id
11116       self.cfg.SetDiskID(dev, self.instance.primary_node)
11117
11118     self.cfg.Update(self.instance, feedback_fn)
11119
11120     # Release all node locks (the configuration has been updated)
11121     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11122
11123     # and now perform the drbd attach
11124     self.lu.LogInfo("Attaching primary drbds to new secondary"
11125                     " (standalone => connected)")
11126     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11127                                             self.new_node],
11128                                            self.node_secondary_ip,
11129                                            self.instance.disks,
11130                                            self.instance.name,
11131                                            False)
11132     for to_node, to_result in result.items():
11133       msg = to_result.fail_msg
11134       if msg:
11135         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11136                            to_node, msg,
11137                            hint=("please do a gnt-instance info to see the"
11138                                  " status of disks"))
11139
11140     cstep = itertools.count(5)
11141
11142     if self.early_release:
11143       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11144       self._RemoveOldStorage(self.target_node, iv_names)
11145       # TODO: Check if releasing locks early still makes sense
11146       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11147     else:
11148       # Release all resource locks except those used by the instance
11149       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11150                     keep=self.node_secondary_ip.keys())
11151
11152     # TODO: Can the instance lock be downgraded here? Take the optional disk
11153     # shutdown in the caller into consideration.
11154
11155     # Wait for sync
11156     # This can fail as the old devices are degraded and _WaitForSync
11157     # does a combined result over all disks, so we don't check its return value
11158     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11159     _WaitForSync(self.lu, self.instance)
11160
11161     # Check all devices manually
11162     self._CheckDevices(self.instance.primary_node, iv_names)
11163
11164     # Step: remove old storage
11165     if not self.early_release:
11166       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11167       self._RemoveOldStorage(self.target_node, iv_names)
11168
11169
11170 class LURepairNodeStorage(NoHooksLU):
11171   """Repairs the volume group on a node.
11172
11173   """
11174   REQ_BGL = False
11175
11176   def CheckArguments(self):
11177     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11178
11179     storage_type = self.op.storage_type
11180
11181     if (constants.SO_FIX_CONSISTENCY not in
11182         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11183       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11184                                  " repaired" % storage_type,
11185                                  errors.ECODE_INVAL)
11186
11187   def ExpandNames(self):
11188     self.needed_locks = {
11189       locking.LEVEL_NODE: [self.op.node_name],
11190       }
11191
11192   def _CheckFaultyDisks(self, instance, node_name):
11193     """Ensure faulty disks abort the opcode or at least warn."""
11194     try:
11195       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11196                                   node_name, True):
11197         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11198                                    " node '%s'" % (instance.name, node_name),
11199                                    errors.ECODE_STATE)
11200     except errors.OpPrereqError, err:
11201       if self.op.ignore_consistency:
11202         self.proc.LogWarning(str(err.args[0]))
11203       else:
11204         raise
11205
11206   def CheckPrereq(self):
11207     """Check prerequisites.
11208
11209     """
11210     # Check whether any instance on this node has faulty disks
11211     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11212       if inst.admin_state != constants.ADMINST_UP:
11213         continue
11214       check_nodes = set(inst.all_nodes)
11215       check_nodes.discard(self.op.node_name)
11216       for inst_node_name in check_nodes:
11217         self._CheckFaultyDisks(inst, inst_node_name)
11218
11219   def Exec(self, feedback_fn):
11220     feedback_fn("Repairing storage unit '%s' on %s ..." %
11221                 (self.op.name, self.op.node_name))
11222
11223     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11224     result = self.rpc.call_storage_execute(self.op.node_name,
11225                                            self.op.storage_type, st_args,
11226                                            self.op.name,
11227                                            constants.SO_FIX_CONSISTENCY)
11228     result.Raise("Failed to repair storage unit '%s' on %s" %
11229                  (self.op.name, self.op.node_name))
11230
11231
11232 class LUNodeEvacuate(NoHooksLU):
11233   """Evacuates instances off a list of nodes.
11234
11235   """
11236   REQ_BGL = False
11237
11238   _MODE2IALLOCATOR = {
11239     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11240     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11241     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11242     }
11243   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11244   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11245           constants.IALLOCATOR_NEVAC_MODES)
11246
11247   def CheckArguments(self):
11248     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11249
11250   def ExpandNames(self):
11251     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11252
11253     if self.op.remote_node is not None:
11254       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11255       assert self.op.remote_node
11256
11257       if self.op.remote_node == self.op.node_name:
11258         raise errors.OpPrereqError("Can not use evacuated node as a new"
11259                                    " secondary node", errors.ECODE_INVAL)
11260
11261       if self.op.mode != constants.NODE_EVAC_SEC:
11262         raise errors.OpPrereqError("Without the use of an iallocator only"
11263                                    " secondary instances can be evacuated",
11264                                    errors.ECODE_INVAL)
11265
11266     # Declare locks
11267     self.share_locks = _ShareAll()
11268     self.needed_locks = {
11269       locking.LEVEL_INSTANCE: [],
11270       locking.LEVEL_NODEGROUP: [],
11271       locking.LEVEL_NODE: [],
11272       }
11273
11274     # Determine nodes (via group) optimistically, needs verification once locks
11275     # have been acquired
11276     self.lock_nodes = self._DetermineNodes()
11277
11278   def _DetermineNodes(self):
11279     """Gets the list of nodes to operate on.
11280
11281     """
11282     if self.op.remote_node is None:
11283       # Iallocator will choose any node(s) in the same group
11284       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11285     else:
11286       group_nodes = frozenset([self.op.remote_node])
11287
11288     # Determine nodes to be locked
11289     return set([self.op.node_name]) | group_nodes
11290
11291   def _DetermineInstances(self):
11292     """Builds list of instances to operate on.
11293
11294     """
11295     assert self.op.mode in constants.NODE_EVAC_MODES
11296
11297     if self.op.mode == constants.NODE_EVAC_PRI:
11298       # Primary instances only
11299       inst_fn = _GetNodePrimaryInstances
11300       assert self.op.remote_node is None, \
11301         "Evacuating primary instances requires iallocator"
11302     elif self.op.mode == constants.NODE_EVAC_SEC:
11303       # Secondary instances only
11304       inst_fn = _GetNodeSecondaryInstances
11305     else:
11306       # All instances
11307       assert self.op.mode == constants.NODE_EVAC_ALL
11308       inst_fn = _GetNodeInstances
11309       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11310       # per instance
11311       raise errors.OpPrereqError("Due to an issue with the iallocator"
11312                                  " interface it is not possible to evacuate"
11313                                  " all instances at once; specify explicitly"
11314                                  " whether to evacuate primary or secondary"
11315                                  " instances",
11316                                  errors.ECODE_INVAL)
11317
11318     return inst_fn(self.cfg, self.op.node_name)
11319
11320   def DeclareLocks(self, level):
11321     if level == locking.LEVEL_INSTANCE:
11322       # Lock instances optimistically, needs verification once node and group
11323       # locks have been acquired
11324       self.needed_locks[locking.LEVEL_INSTANCE] = \
11325         set(i.name for i in self._DetermineInstances())
11326
11327     elif level == locking.LEVEL_NODEGROUP:
11328       # Lock node groups for all potential target nodes optimistically, needs
11329       # verification once nodes have been acquired
11330       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11331         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11332
11333     elif level == locking.LEVEL_NODE:
11334       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11335
11336   def CheckPrereq(self):
11337     # Verify locks
11338     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11339     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11340     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11341
11342     need_nodes = self._DetermineNodes()
11343
11344     if not owned_nodes.issuperset(need_nodes):
11345       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11346                                  " locks were acquired, current nodes are"
11347                                  " are '%s', used to be '%s'; retry the"
11348                                  " operation" %
11349                                  (self.op.node_name,
11350                                   utils.CommaJoin(need_nodes),
11351                                   utils.CommaJoin(owned_nodes)),
11352                                  errors.ECODE_STATE)
11353
11354     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11355     if owned_groups != wanted_groups:
11356       raise errors.OpExecError("Node groups changed since locks were acquired,"
11357                                " current groups are '%s', used to be '%s';"
11358                                " retry the operation" %
11359                                (utils.CommaJoin(wanted_groups),
11360                                 utils.CommaJoin(owned_groups)))
11361
11362     # Determine affected instances
11363     self.instances = self._DetermineInstances()
11364     self.instance_names = [i.name for i in self.instances]
11365
11366     if set(self.instance_names) != owned_instances:
11367       raise errors.OpExecError("Instances on node '%s' changed since locks"
11368                                " were acquired, current instances are '%s',"
11369                                " used to be '%s'; retry the operation" %
11370                                (self.op.node_name,
11371                                 utils.CommaJoin(self.instance_names),
11372                                 utils.CommaJoin(owned_instances)))
11373
11374     if self.instance_names:
11375       self.LogInfo("Evacuating instances from node '%s': %s",
11376                    self.op.node_name,
11377                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11378     else:
11379       self.LogInfo("No instances to evacuate from node '%s'",
11380                    self.op.node_name)
11381
11382     if self.op.remote_node is not None:
11383       for i in self.instances:
11384         if i.primary_node == self.op.remote_node:
11385           raise errors.OpPrereqError("Node %s is the primary node of"
11386                                      " instance %s, cannot use it as"
11387                                      " secondary" %
11388                                      (self.op.remote_node, i.name),
11389                                      errors.ECODE_INVAL)
11390
11391   def Exec(self, feedback_fn):
11392     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11393
11394     if not self.instance_names:
11395       # No instances to evacuate
11396       jobs = []
11397
11398     elif self.op.iallocator is not None:
11399       # TODO: Implement relocation to other group
11400       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11401                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11402                        instances=list(self.instance_names))
11403
11404       ial.Run(self.op.iallocator)
11405
11406       if not ial.success:
11407         raise errors.OpPrereqError("Can't compute node evacuation using"
11408                                    " iallocator '%s': %s" %
11409                                    (self.op.iallocator, ial.info),
11410                                    errors.ECODE_NORES)
11411
11412       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11413
11414     elif self.op.remote_node is not None:
11415       assert self.op.mode == constants.NODE_EVAC_SEC
11416       jobs = [
11417         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11418                                         remote_node=self.op.remote_node,
11419                                         disks=[],
11420                                         mode=constants.REPLACE_DISK_CHG,
11421                                         early_release=self.op.early_release)]
11422         for instance_name in self.instance_names
11423         ]
11424
11425     else:
11426       raise errors.ProgrammerError("No iallocator or remote node")
11427
11428     return ResultWithJobs(jobs)
11429
11430
11431 def _SetOpEarlyRelease(early_release, op):
11432   """Sets C{early_release} flag on opcodes if available.
11433
11434   """
11435   try:
11436     op.early_release = early_release
11437   except AttributeError:
11438     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11439
11440   return op
11441
11442
11443 def _NodeEvacDest(use_nodes, group, nodes):
11444   """Returns group or nodes depending on caller's choice.
11445
11446   """
11447   if use_nodes:
11448     return utils.CommaJoin(nodes)
11449   else:
11450     return group
11451
11452
11453 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11454   """Unpacks the result of change-group and node-evacuate iallocator requests.
11455
11456   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11457   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11458
11459   @type lu: L{LogicalUnit}
11460   @param lu: Logical unit instance
11461   @type alloc_result: tuple/list
11462   @param alloc_result: Result from iallocator
11463   @type early_release: bool
11464   @param early_release: Whether to release locks early if possible
11465   @type use_nodes: bool
11466   @param use_nodes: Whether to display node names instead of groups
11467
11468   """
11469   (moved, failed, jobs) = alloc_result
11470
11471   if failed:
11472     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11473                                  for (name, reason) in failed)
11474     lu.LogWarning("Unable to evacuate instances %s", failreason)
11475     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11476
11477   if moved:
11478     lu.LogInfo("Instances to be moved: %s",
11479                utils.CommaJoin("%s (to %s)" %
11480                                (name, _NodeEvacDest(use_nodes, group, nodes))
11481                                for (name, group, nodes) in moved))
11482
11483   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11484               map(opcodes.OpCode.LoadOpCode, ops))
11485           for ops in jobs]
11486
11487
11488 class LUInstanceGrowDisk(LogicalUnit):
11489   """Grow a disk of an instance.
11490
11491   """
11492   HPATH = "disk-grow"
11493   HTYPE = constants.HTYPE_INSTANCE
11494   REQ_BGL = False
11495
11496   def ExpandNames(self):
11497     self._ExpandAndLockInstance()
11498     self.needed_locks[locking.LEVEL_NODE] = []
11499     self.needed_locks[locking.LEVEL_NODE_RES] = []
11500     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11501     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11502
11503   def DeclareLocks(self, level):
11504     if level == locking.LEVEL_NODE:
11505       self._LockInstancesNodes()
11506     elif level == locking.LEVEL_NODE_RES:
11507       # Copy node locks
11508       self.needed_locks[locking.LEVEL_NODE_RES] = \
11509         self.needed_locks[locking.LEVEL_NODE][:]
11510
11511   def BuildHooksEnv(self):
11512     """Build hooks env.
11513
11514     This runs on the master, the primary and all the secondaries.
11515
11516     """
11517     env = {
11518       "DISK": self.op.disk,
11519       "AMOUNT": self.op.amount,
11520       }
11521     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11522     return env
11523
11524   def BuildHooksNodes(self):
11525     """Build hooks nodes.
11526
11527     """
11528     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11529     return (nl, nl)
11530
11531   def CheckPrereq(self):
11532     """Check prerequisites.
11533
11534     This checks that the instance is in the cluster.
11535
11536     """
11537     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11538     assert instance is not None, \
11539       "Cannot retrieve locked instance %s" % self.op.instance_name
11540     nodenames = list(instance.all_nodes)
11541     for node in nodenames:
11542       _CheckNodeOnline(self, node)
11543
11544     self.instance = instance
11545
11546     if instance.disk_template not in constants.DTS_GROWABLE:
11547       raise errors.OpPrereqError("Instance's disk layout does not support"
11548                                  " growing", errors.ECODE_INVAL)
11549
11550     self.disk = instance.FindDisk(self.op.disk)
11551
11552     if instance.disk_template not in (constants.DT_FILE,
11553                                       constants.DT_SHARED_FILE,
11554                                       constants.DT_RBD):
11555       # TODO: check the free disk space for file, when that feature will be
11556       # supported
11557       _CheckNodesFreeDiskPerVG(self, nodenames,
11558                                self.disk.ComputeGrowth(self.op.amount))
11559
11560   def Exec(self, feedback_fn):
11561     """Execute disk grow.
11562
11563     """
11564     instance = self.instance
11565     disk = self.disk
11566
11567     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11568     assert (self.owned_locks(locking.LEVEL_NODE) ==
11569             self.owned_locks(locking.LEVEL_NODE_RES))
11570
11571     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11572     if not disks_ok:
11573       raise errors.OpExecError("Cannot activate block device to grow")
11574
11575     feedback_fn("Growing disk %s of instance '%s' by %s" %
11576                 (self.op.disk, instance.name,
11577                  utils.FormatUnit(self.op.amount, "h")))
11578
11579     # First run all grow ops in dry-run mode
11580     for node in instance.all_nodes:
11581       self.cfg.SetDiskID(disk, node)
11582       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11583       result.Raise("Grow request failed to node %s" % node)
11584
11585     # We know that (as far as we can test) operations across different
11586     # nodes will succeed, time to run it for real
11587     for node in instance.all_nodes:
11588       self.cfg.SetDiskID(disk, node)
11589       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11590       result.Raise("Grow request failed to node %s" % node)
11591
11592       # TODO: Rewrite code to work properly
11593       # DRBD goes into sync mode for a short amount of time after executing the
11594       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11595       # calling "resize" in sync mode fails. Sleeping for a short amount of
11596       # time is a work-around.
11597       time.sleep(5)
11598
11599     disk.RecordGrow(self.op.amount)
11600     self.cfg.Update(instance, feedback_fn)
11601
11602     # Changes have been recorded, release node lock
11603     _ReleaseLocks(self, locking.LEVEL_NODE)
11604
11605     # Downgrade lock while waiting for sync
11606     self.glm.downgrade(locking.LEVEL_INSTANCE)
11607
11608     if self.op.wait_for_sync:
11609       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11610       if disk_abort:
11611         self.proc.LogWarning("Disk sync-ing has not returned a good"
11612                              " status; please check the instance")
11613       if instance.admin_state != constants.ADMINST_UP:
11614         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11615     elif instance.admin_state != constants.ADMINST_UP:
11616       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11617                            " not supposed to be running because no wait for"
11618                            " sync mode was requested")
11619
11620     assert self.owned_locks(locking.LEVEL_NODE_RES)
11621     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11622
11623
11624 class LUInstanceQueryData(NoHooksLU):
11625   """Query runtime instance data.
11626
11627   """
11628   REQ_BGL = False
11629
11630   def ExpandNames(self):
11631     self.needed_locks = {}
11632
11633     # Use locking if requested or when non-static information is wanted
11634     if not (self.op.static or self.op.use_locking):
11635       self.LogWarning("Non-static data requested, locks need to be acquired")
11636       self.op.use_locking = True
11637
11638     if self.op.instances or not self.op.use_locking:
11639       # Expand instance names right here
11640       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11641     else:
11642       # Will use acquired locks
11643       self.wanted_names = None
11644
11645     if self.op.use_locking:
11646       self.share_locks = _ShareAll()
11647
11648       if self.wanted_names is None:
11649         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11650       else:
11651         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11652
11653       self.needed_locks[locking.LEVEL_NODE] = []
11654       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11655
11656   def DeclareLocks(self, level):
11657     if self.op.use_locking and level == locking.LEVEL_NODE:
11658       self._LockInstancesNodes()
11659
11660   def CheckPrereq(self):
11661     """Check prerequisites.
11662
11663     This only checks the optional instance list against the existing names.
11664
11665     """
11666     if self.wanted_names is None:
11667       assert self.op.use_locking, "Locking was not used"
11668       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11669
11670     self.wanted_instances = \
11671         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11672
11673   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11674     """Returns the status of a block device
11675
11676     """
11677     if self.op.static or not node:
11678       return None
11679
11680     self.cfg.SetDiskID(dev, node)
11681
11682     result = self.rpc.call_blockdev_find(node, dev)
11683     if result.offline:
11684       return None
11685
11686     result.Raise("Can't compute disk status for %s" % instance_name)
11687
11688     status = result.payload
11689     if status is None:
11690       return None
11691
11692     return (status.dev_path, status.major, status.minor,
11693             status.sync_percent, status.estimated_time,
11694             status.is_degraded, status.ldisk_status)
11695
11696   def _ComputeDiskStatus(self, instance, snode, dev):
11697     """Compute block device status.
11698
11699     """
11700     if dev.dev_type in constants.LDS_DRBD:
11701       # we change the snode then (otherwise we use the one passed in)
11702       if dev.logical_id[0] == instance.primary_node:
11703         snode = dev.logical_id[1]
11704       else:
11705         snode = dev.logical_id[0]
11706
11707     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11708                                               instance.name, dev)
11709     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11710
11711     if dev.children:
11712       dev_children = map(compat.partial(self._ComputeDiskStatus,
11713                                         instance, snode),
11714                          dev.children)
11715     else:
11716       dev_children = []
11717
11718     return {
11719       "iv_name": dev.iv_name,
11720       "dev_type": dev.dev_type,
11721       "logical_id": dev.logical_id,
11722       "physical_id": dev.physical_id,
11723       "pstatus": dev_pstatus,
11724       "sstatus": dev_sstatus,
11725       "children": dev_children,
11726       "mode": dev.mode,
11727       "size": dev.size,
11728       }
11729
11730   def Exec(self, feedback_fn):
11731     """Gather and return data"""
11732     result = {}
11733
11734     cluster = self.cfg.GetClusterInfo()
11735
11736     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11737                                           for i in self.wanted_instances)
11738     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11739       if self.op.static or pnode.offline:
11740         remote_state = None
11741         if pnode.offline:
11742           self.LogWarning("Primary node %s is marked offline, returning static"
11743                           " information only for instance %s" %
11744                           (pnode.name, instance.name))
11745       else:
11746         remote_info = self.rpc.call_instance_info(instance.primary_node,
11747                                                   instance.name,
11748                                                   instance.hypervisor)
11749         remote_info.Raise("Error checking node %s" % instance.primary_node)
11750         remote_info = remote_info.payload
11751         if remote_info and "state" in remote_info:
11752           remote_state = "up"
11753         else:
11754           if instance.admin_state == constants.ADMINST_UP:
11755             remote_state = "down"
11756           else:
11757             remote_state = instance.admin_state
11758
11759       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11760                   instance.disks)
11761
11762       result[instance.name] = {
11763         "name": instance.name,
11764         "config_state": instance.admin_state,
11765         "run_state": remote_state,
11766         "pnode": instance.primary_node,
11767         "snodes": instance.secondary_nodes,
11768         "os": instance.os,
11769         # this happens to be the same format used for hooks
11770         "nics": _NICListToTuple(self, instance.nics),
11771         "disk_template": instance.disk_template,
11772         "disks": disks,
11773         "hypervisor": instance.hypervisor,
11774         "network_port": instance.network_port,
11775         "hv_instance": instance.hvparams,
11776         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11777         "be_instance": instance.beparams,
11778         "be_actual": cluster.FillBE(instance),
11779         "os_instance": instance.osparams,
11780         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11781         "serial_no": instance.serial_no,
11782         "mtime": instance.mtime,
11783         "ctime": instance.ctime,
11784         "uuid": instance.uuid,
11785         }
11786
11787     return result
11788
11789
11790 def PrepareContainerMods(mods, private_fn):
11791   """Prepares a list of container modifications by adding a private data field.
11792
11793   @type mods: list of tuples; (operation, index, parameters)
11794   @param mods: List of modifications
11795   @type private_fn: callable or None
11796   @param private_fn: Callable for constructing a private data field for a
11797     modification
11798   @rtype: list
11799
11800   """
11801   if private_fn is None:
11802     fn = lambda: None
11803   else:
11804     fn = private_fn
11805
11806   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11807
11808
11809 #: Type description for changes as returned by L{ApplyContainerMods}'s
11810 #: callbacks
11811 _TApplyContModsCbChanges = \
11812   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11813     ht.TNonEmptyString,
11814     ht.TAny,
11815     ])))
11816
11817
11818 def ApplyContainerMods(kind, container, chgdesc, mods,
11819                        create_fn, modify_fn, remove_fn):
11820   """Applies descriptions in C{mods} to C{container}.
11821
11822   @type kind: string
11823   @param kind: One-word item description
11824   @type container: list
11825   @param container: Container to modify
11826   @type chgdesc: None or list
11827   @param chgdesc: List of applied changes
11828   @type mods: list
11829   @param mods: Modifications as returned by L{PrepareContainerMods}
11830   @type create_fn: callable
11831   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11832     receives absolute item index, parameters and private data object as added
11833     by L{PrepareContainerMods}, returns tuple containing new item and changes
11834     as list
11835   @type modify_fn: callable
11836   @param modify_fn: Callback for modifying an existing item
11837     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11838     and private data object as added by L{PrepareContainerMods}, returns
11839     changes as list
11840   @type remove_fn: callable
11841   @param remove_fn: Callback on removing item; receives absolute item index,
11842     item and private data object as added by L{PrepareContainerMods}
11843
11844   """
11845   for (op, idx, params, private) in mods:
11846     if idx == -1:
11847       # Append
11848       absidx = len(container) - 1
11849     elif idx < 0:
11850       raise IndexError("Not accepting negative indices other than -1")
11851     elif idx > len(container):
11852       raise IndexError("Got %s index %s, but there are only %s" %
11853                        (kind, idx, len(container)))
11854     else:
11855       absidx = idx
11856
11857     changes = None
11858
11859     if op == constants.DDM_ADD:
11860       # Calculate where item will be added
11861       if idx == -1:
11862         addidx = len(container)
11863       else:
11864         addidx = idx
11865
11866       if create_fn is None:
11867         item = params
11868       else:
11869         (item, changes) = create_fn(addidx, params, private)
11870
11871       if idx == -1:
11872         container.append(item)
11873       else:
11874         assert idx >= 0
11875         assert idx <= len(container)
11876         # list.insert does so before the specified index
11877         container.insert(idx, item)
11878     else:
11879       # Retrieve existing item
11880       try:
11881         item = container[absidx]
11882       except IndexError:
11883         raise IndexError("Invalid %s index %s" % (kind, idx))
11884
11885       if op == constants.DDM_REMOVE:
11886         assert not params
11887
11888         if remove_fn is not None:
11889           remove_fn(absidx, item, private)
11890
11891         changes = [("%s/%s" % (kind, absidx), "remove")]
11892
11893         assert container[absidx] == item
11894         del container[absidx]
11895       elif op == constants.DDM_MODIFY:
11896         if modify_fn is not None:
11897           changes = modify_fn(absidx, item, params, private)
11898       else:
11899         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11900
11901     assert _TApplyContModsCbChanges(changes)
11902
11903     if not (chgdesc is None or changes is None):
11904       chgdesc.extend(changes)
11905
11906
11907 def _UpdateIvNames(base_index, disks):
11908   """Updates the C{iv_name} attribute of disks.
11909
11910   @type disks: list of L{objects.Disk}
11911
11912   """
11913   for (idx, disk) in enumerate(disks):
11914     disk.iv_name = "disk/%s" % (base_index + idx, )
11915
11916
11917 class _InstNicModPrivate:
11918   """Data structure for network interface modifications.
11919
11920   Used by L{LUInstanceSetParams}.
11921
11922   """
11923   def __init__(self):
11924     self.params = None
11925     self.filled = None
11926
11927
11928 class LUInstanceSetParams(LogicalUnit):
11929   """Modifies an instances's parameters.
11930
11931   """
11932   HPATH = "instance-modify"
11933   HTYPE = constants.HTYPE_INSTANCE
11934   REQ_BGL = False
11935
11936   @staticmethod
11937   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11938     assert ht.TList(mods)
11939     assert not mods or len(mods[0]) in (2, 3)
11940
11941     if mods and len(mods[0]) == 2:
11942       result = []
11943
11944       addremove = 0
11945       for op, params in mods:
11946         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11947           result.append((op, -1, params))
11948           addremove += 1
11949
11950           if addremove > 1:
11951             raise errors.OpPrereqError("Only one %s add or remove operation is"
11952                                        " supported at a time" % kind,
11953                                        errors.ECODE_INVAL)
11954         else:
11955           result.append((constants.DDM_MODIFY, op, params))
11956
11957       assert verify_fn(result)
11958     else:
11959       result = mods
11960
11961     return result
11962
11963   @staticmethod
11964   def _CheckMods(kind, mods, key_types, item_fn):
11965     """Ensures requested disk/NIC modifications are valid.
11966
11967     """
11968     for (op, _, params) in mods:
11969       assert ht.TDict(params)
11970
11971       utils.ForceDictType(params, key_types)
11972
11973       if op == constants.DDM_REMOVE:
11974         if params:
11975           raise errors.OpPrereqError("No settings should be passed when"
11976                                      " removing a %s" % kind,
11977                                      errors.ECODE_INVAL)
11978       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11979         item_fn(op, params)
11980       else:
11981         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11982
11983   @staticmethod
11984   def _VerifyDiskModification(op, params):
11985     """Verifies a disk modification.
11986
11987     """
11988     if op == constants.DDM_ADD:
11989       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11990       if mode not in constants.DISK_ACCESS_SET:
11991         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11992                                    errors.ECODE_INVAL)
11993
11994       size = params.get(constants.IDISK_SIZE, None)
11995       if size is None:
11996         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11997                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
11998
11999       try:
12000         size = int(size)
12001       except (TypeError, ValueError), err:
12002         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12003                                    errors.ECODE_INVAL)
12004
12005       params[constants.IDISK_SIZE] = size
12006
12007     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12008       raise errors.OpPrereqError("Disk size change not possible, use"
12009                                  " grow-disk", errors.ECODE_INVAL)
12010
12011   @staticmethod
12012   def _VerifyNicModification(op, params):
12013     """Verifies a network interface modification.
12014
12015     """
12016     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12017       ip = params.get(constants.INIC_IP, None)
12018       if ip is None:
12019         pass
12020       elif ip.lower() == constants.VALUE_NONE:
12021         params[constants.INIC_IP] = None
12022       elif not netutils.IPAddress.IsValid(ip):
12023         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12024                                    errors.ECODE_INVAL)
12025
12026       bridge = params.get("bridge", None)
12027       link = params.get(constants.INIC_LINK, None)
12028       if bridge and link:
12029         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12030                                    " at the same time", errors.ECODE_INVAL)
12031       elif bridge and bridge.lower() == constants.VALUE_NONE:
12032         params["bridge"] = None
12033       elif link and link.lower() == constants.VALUE_NONE:
12034         params[constants.INIC_LINK] = None
12035
12036       if op == constants.DDM_ADD:
12037         macaddr = params.get(constants.INIC_MAC, None)
12038         if macaddr is None:
12039           params[constants.INIC_MAC] = constants.VALUE_AUTO
12040
12041       if constants.INIC_MAC in params:
12042         macaddr = params[constants.INIC_MAC]
12043         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12044           macaddr = utils.NormalizeAndValidateMac(macaddr)
12045
12046         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12047           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12048                                      " modifying an existing NIC",
12049                                      errors.ECODE_INVAL)
12050
12051   def CheckArguments(self):
12052     if not (self.op.nics or self.op.disks or self.op.disk_template or
12053             self.op.hvparams or self.op.beparams or self.op.os_name or
12054             self.op.offline is not None or self.op.runtime_mem):
12055       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12056
12057     if self.op.hvparams:
12058       _CheckGlobalHvParams(self.op.hvparams)
12059
12060     self.op.disks = \
12061       self._UpgradeDiskNicMods("disk", self.op.disks,
12062         opcodes.OpInstanceSetParams.TestDiskModifications)
12063     self.op.nics = \
12064       self._UpgradeDiskNicMods("NIC", self.op.nics,
12065         opcodes.OpInstanceSetParams.TestNicModifications)
12066
12067     # Check disk modifications
12068     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12069                     self._VerifyDiskModification)
12070
12071     if self.op.disks and self.op.disk_template is not None:
12072       raise errors.OpPrereqError("Disk template conversion and other disk"
12073                                  " changes not supported at the same time",
12074                                  errors.ECODE_INVAL)
12075
12076     if (self.op.disk_template and
12077         self.op.disk_template in constants.DTS_INT_MIRROR and
12078         self.op.remote_node is None):
12079       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12080                                  " one requires specifying a secondary node",
12081                                  errors.ECODE_INVAL)
12082
12083     # Check NIC modifications
12084     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12085                     self._VerifyNicModification)
12086
12087   def ExpandNames(self):
12088     self._ExpandAndLockInstance()
12089     # Can't even acquire node locks in shared mode as upcoming changes in
12090     # Ganeti 2.6 will start to modify the node object on disk conversion
12091     self.needed_locks[locking.LEVEL_NODE] = []
12092     self.needed_locks[locking.LEVEL_NODE_RES] = []
12093     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12094
12095   def DeclareLocks(self, level):
12096     # TODO: Acquire group lock in shared mode (disk parameters)
12097     if level == locking.LEVEL_NODE:
12098       self._LockInstancesNodes()
12099       if self.op.disk_template and self.op.remote_node:
12100         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12101         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12102     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12103       # Copy node locks
12104       self.needed_locks[locking.LEVEL_NODE_RES] = \
12105         self.needed_locks[locking.LEVEL_NODE][:]
12106
12107   def BuildHooksEnv(self):
12108     """Build hooks env.
12109
12110     This runs on the master, primary and secondaries.
12111
12112     """
12113     args = dict()
12114     if constants.BE_MINMEM in self.be_new:
12115       args["minmem"] = self.be_new[constants.BE_MINMEM]
12116     if constants.BE_MAXMEM in self.be_new:
12117       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12118     if constants.BE_VCPUS in self.be_new:
12119       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12120     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12121     # information at all.
12122
12123     if self._new_nics is not None:
12124       nics = []
12125
12126       for nic in self._new_nics:
12127         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12128         mode = nicparams[constants.NIC_MODE]
12129         link = nicparams[constants.NIC_LINK]
12130         nics.append((nic.ip, nic.mac, mode, link))
12131
12132       args["nics"] = nics
12133
12134     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12135     if self.op.disk_template:
12136       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12137     if self.op.runtime_mem:
12138       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12139
12140     return env
12141
12142   def BuildHooksNodes(self):
12143     """Build hooks nodes.
12144
12145     """
12146     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12147     return (nl, nl)
12148
12149   def _PrepareNicModification(self, params, private, old_ip, old_params,
12150                               cluster, pnode):
12151     update_params_dict = dict([(key, params[key])
12152                                for key in constants.NICS_PARAMETERS
12153                                if key in params])
12154
12155     if "bridge" in params:
12156       update_params_dict[constants.NIC_LINK] = params["bridge"]
12157
12158     new_params = _GetUpdatedParams(old_params, update_params_dict)
12159     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12160
12161     new_filled_params = cluster.SimpleFillNIC(new_params)
12162     objects.NIC.CheckParameterSyntax(new_filled_params)
12163
12164     new_mode = new_filled_params[constants.NIC_MODE]
12165     if new_mode == constants.NIC_MODE_BRIDGED:
12166       bridge = new_filled_params[constants.NIC_LINK]
12167       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12168       if msg:
12169         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12170         if self.op.force:
12171           self.warn.append(msg)
12172         else:
12173           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12174
12175     elif new_mode == constants.NIC_MODE_ROUTED:
12176       ip = params.get(constants.INIC_IP, old_ip)
12177       if ip is None:
12178         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12179                                    " on a routed NIC", errors.ECODE_INVAL)
12180
12181     if constants.INIC_MAC in params:
12182       mac = params[constants.INIC_MAC]
12183       if mac is None:
12184         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12185                                    errors.ECODE_INVAL)
12186       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12187         # otherwise generate the MAC address
12188         params[constants.INIC_MAC] = \
12189           self.cfg.GenerateMAC(self.proc.GetECId())
12190       else:
12191         # or validate/reserve the current one
12192         try:
12193           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12194         except errors.ReservationError:
12195           raise errors.OpPrereqError("MAC address '%s' already in use"
12196                                      " in cluster" % mac,
12197                                      errors.ECODE_NOTUNIQUE)
12198
12199     private.params = new_params
12200     private.filled = new_filled_params
12201
12202     return (None, None)
12203
12204   def CheckPrereq(self):
12205     """Check prerequisites.
12206
12207     This only checks the instance list against the existing names.
12208
12209     """
12210     # checking the new params on the primary/secondary nodes
12211
12212     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12213     cluster = self.cluster = self.cfg.GetClusterInfo()
12214     assert self.instance is not None, \
12215       "Cannot retrieve locked instance %s" % self.op.instance_name
12216     pnode = instance.primary_node
12217     nodelist = list(instance.all_nodes)
12218     pnode_info = self.cfg.GetNodeInfo(pnode)
12219     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12220
12221     # Prepare disk/NIC modifications
12222     self.diskmod = PrepareContainerMods(self.op.disks, None)
12223     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12224
12225     # OS change
12226     if self.op.os_name and not self.op.force:
12227       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12228                       self.op.force_variant)
12229       instance_os = self.op.os_name
12230     else:
12231       instance_os = instance.os
12232
12233     assert not (self.op.disk_template and self.op.disks), \
12234       "Can't modify disk template and apply disk changes at the same time"
12235
12236     if self.op.disk_template:
12237       if instance.disk_template == self.op.disk_template:
12238         raise errors.OpPrereqError("Instance already has disk template %s" %
12239                                    instance.disk_template, errors.ECODE_INVAL)
12240
12241       if (instance.disk_template,
12242           self.op.disk_template) not in self._DISK_CONVERSIONS:
12243         raise errors.OpPrereqError("Unsupported disk template conversion from"
12244                                    " %s to %s" % (instance.disk_template,
12245                                                   self.op.disk_template),
12246                                    errors.ECODE_INVAL)
12247       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12248                           msg="cannot change disk template")
12249       if self.op.disk_template in constants.DTS_INT_MIRROR:
12250         if self.op.remote_node == pnode:
12251           raise errors.OpPrereqError("Given new secondary node %s is the same"
12252                                      " as the primary node of the instance" %
12253                                      self.op.remote_node, errors.ECODE_STATE)
12254         _CheckNodeOnline(self, self.op.remote_node)
12255         _CheckNodeNotDrained(self, self.op.remote_node)
12256         # FIXME: here we assume that the old instance type is DT_PLAIN
12257         assert instance.disk_template == constants.DT_PLAIN
12258         disks = [{constants.IDISK_SIZE: d.size,
12259                   constants.IDISK_VG: d.logical_id[0]}
12260                  for d in instance.disks]
12261         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12262         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12263
12264         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12265         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12266         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12267         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12268                                 ignore=self.op.ignore_ipolicy)
12269         if pnode_info.group != snode_info.group:
12270           self.LogWarning("The primary and secondary nodes are in two"
12271                           " different node groups; the disk parameters"
12272                           " from the first disk's node group will be"
12273                           " used")
12274
12275     # hvparams processing
12276     if self.op.hvparams:
12277       hv_type = instance.hypervisor
12278       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12279       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12280       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12281
12282       # local check
12283       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12284       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12285       self.hv_proposed = self.hv_new = hv_new # the new actual values
12286       self.hv_inst = i_hvdict # the new dict (without defaults)
12287     else:
12288       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12289                                               instance.hvparams)
12290       self.hv_new = self.hv_inst = {}
12291
12292     # beparams processing
12293     if self.op.beparams:
12294       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12295                                    use_none=True)
12296       objects.UpgradeBeParams(i_bedict)
12297       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12298       be_new = cluster.SimpleFillBE(i_bedict)
12299       self.be_proposed = self.be_new = be_new # the new actual values
12300       self.be_inst = i_bedict # the new dict (without defaults)
12301     else:
12302       self.be_new = self.be_inst = {}
12303       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12304     be_old = cluster.FillBE(instance)
12305
12306     # CPU param validation -- checking every time a paramtere is
12307     # changed to cover all cases where either CPU mask or vcpus have
12308     # changed
12309     if (constants.BE_VCPUS in self.be_proposed and
12310         constants.HV_CPU_MASK in self.hv_proposed):
12311       cpu_list = \
12312         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12313       # Verify mask is consistent with number of vCPUs. Can skip this
12314       # test if only 1 entry in the CPU mask, which means same mask
12315       # is applied to all vCPUs.
12316       if (len(cpu_list) > 1 and
12317           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12318         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12319                                    " CPU mask [%s]" %
12320                                    (self.be_proposed[constants.BE_VCPUS],
12321                                     self.hv_proposed[constants.HV_CPU_MASK]),
12322                                    errors.ECODE_INVAL)
12323
12324       # Only perform this test if a new CPU mask is given
12325       if constants.HV_CPU_MASK in self.hv_new:
12326         # Calculate the largest CPU number requested
12327         max_requested_cpu = max(map(max, cpu_list))
12328         # Check that all of the instance's nodes have enough physical CPUs to
12329         # satisfy the requested CPU mask
12330         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12331                                 max_requested_cpu + 1, instance.hypervisor)
12332
12333     # osparams processing
12334     if self.op.osparams:
12335       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12336       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12337       self.os_inst = i_osdict # the new dict (without defaults)
12338     else:
12339       self.os_inst = {}
12340
12341     self.warn = []
12342
12343     #TODO(dynmem): do the appropriate check involving MINMEM
12344     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12345         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12346       mem_check_list = [pnode]
12347       if be_new[constants.BE_AUTO_BALANCE]:
12348         # either we changed auto_balance to yes or it was from before
12349         mem_check_list.extend(instance.secondary_nodes)
12350       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12351                                                   instance.hypervisor)
12352       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12353                                          [instance.hypervisor])
12354       pninfo = nodeinfo[pnode]
12355       msg = pninfo.fail_msg
12356       if msg:
12357         # Assume the primary node is unreachable and go ahead
12358         self.warn.append("Can't get info from primary node %s: %s" %
12359                          (pnode, msg))
12360       else:
12361         (_, _, (pnhvinfo, )) = pninfo.payload
12362         if not isinstance(pnhvinfo.get("memory_free", None), int):
12363           self.warn.append("Node data from primary node %s doesn't contain"
12364                            " free memory information" % pnode)
12365         elif instance_info.fail_msg:
12366           self.warn.append("Can't get instance runtime information: %s" %
12367                           instance_info.fail_msg)
12368         else:
12369           if instance_info.payload:
12370             current_mem = int(instance_info.payload["memory"])
12371           else:
12372             # Assume instance not running
12373             # (there is a slight race condition here, but it's not very
12374             # probable, and we have no other way to check)
12375             # TODO: Describe race condition
12376             current_mem = 0
12377           #TODO(dynmem): do the appropriate check involving MINMEM
12378           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12379                       pnhvinfo["memory_free"])
12380           if miss_mem > 0:
12381             raise errors.OpPrereqError("This change will prevent the instance"
12382                                        " from starting, due to %d MB of memory"
12383                                        " missing on its primary node" %
12384                                        miss_mem,
12385                                        errors.ECODE_NORES)
12386
12387       if be_new[constants.BE_AUTO_BALANCE]:
12388         for node, nres in nodeinfo.items():
12389           if node not in instance.secondary_nodes:
12390             continue
12391           nres.Raise("Can't get info from secondary node %s" % node,
12392                      prereq=True, ecode=errors.ECODE_STATE)
12393           (_, _, (nhvinfo, )) = nres.payload
12394           if not isinstance(nhvinfo.get("memory_free", None), int):
12395             raise errors.OpPrereqError("Secondary node %s didn't return free"
12396                                        " memory information" % node,
12397                                        errors.ECODE_STATE)
12398           #TODO(dynmem): do the appropriate check involving MINMEM
12399           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12400             raise errors.OpPrereqError("This change will prevent the instance"
12401                                        " from failover to its secondary node"
12402                                        " %s, due to not enough memory" % node,
12403                                        errors.ECODE_STATE)
12404
12405     if self.op.runtime_mem:
12406       remote_info = self.rpc.call_instance_info(instance.primary_node,
12407                                                 instance.name,
12408                                                 instance.hypervisor)
12409       remote_info.Raise("Error checking node %s" % instance.primary_node)
12410       if not remote_info.payload: # not running already
12411         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12412                                    errors.ECODE_STATE)
12413
12414       current_memory = remote_info.payload["memory"]
12415       if (not self.op.force and
12416            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12417             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12418         raise errors.OpPrereqError("Instance %s must have memory between %d"
12419                                    " and %d MB of memory unless --force is"
12420                                    " given" % (instance.name,
12421                                     self.be_proposed[constants.BE_MINMEM],
12422                                     self.be_proposed[constants.BE_MAXMEM]),
12423                                    errors.ECODE_INVAL)
12424
12425       if self.op.runtime_mem > current_memory:
12426         _CheckNodeFreeMemory(self, instance.primary_node,
12427                              "ballooning memory for instance %s" %
12428                              instance.name,
12429                              self.op.memory - current_memory,
12430                              instance.hypervisor)
12431
12432     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12433       raise errors.OpPrereqError("Disk operations not supported for"
12434                                  " diskless instances",
12435                                  errors.ECODE_INVAL)
12436
12437     def _PrepareNicCreate(_, params, private):
12438       return self._PrepareNicModification(params, private, None, {},
12439                                           cluster, pnode)
12440
12441     def _PrepareNicMod(_, nic, params, private):
12442       return self._PrepareNicModification(params, private, nic.ip,
12443                                           nic.nicparams, cluster, pnode)
12444
12445     # Verify NIC changes (operating on copy)
12446     nics = instance.nics[:]
12447     ApplyContainerMods("NIC", nics, None, self.nicmod,
12448                        _PrepareNicCreate, _PrepareNicMod, None)
12449     if len(nics) > constants.MAX_NICS:
12450       raise errors.OpPrereqError("Instance has too many network interfaces"
12451                                  " (%d), cannot add more" % constants.MAX_NICS,
12452                                  errors.ECODE_STATE)
12453
12454     # Verify disk changes (operating on a copy)
12455     disks = instance.disks[:]
12456     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12457     if len(disks) > constants.MAX_DISKS:
12458       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12459                                  " more" % constants.MAX_DISKS,
12460                                  errors.ECODE_STATE)
12461
12462     if self.op.offline is not None:
12463       if self.op.offline:
12464         msg = "can't change to offline"
12465       else:
12466         msg = "can't change to online"
12467       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12468
12469     # Pre-compute NIC changes (necessary to use result in hooks)
12470     self._nic_chgdesc = []
12471     if self.nicmod:
12472       # Operate on copies as this is still in prereq
12473       nics = [nic.Copy() for nic in instance.nics]
12474       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12475                          self._CreateNewNic, self._ApplyNicMods, None)
12476       self._new_nics = nics
12477     else:
12478       self._new_nics = None
12479
12480   def _ConvertPlainToDrbd(self, feedback_fn):
12481     """Converts an instance from plain to drbd.
12482
12483     """
12484     feedback_fn("Converting template to drbd")
12485     instance = self.instance
12486     pnode = instance.primary_node
12487     snode = self.op.remote_node
12488
12489     assert instance.disk_template == constants.DT_PLAIN
12490
12491     # create a fake disk info for _GenerateDiskTemplate
12492     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12493                   constants.IDISK_VG: d.logical_id[0]}
12494                  for d in instance.disks]
12495     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12496                                       instance.name, pnode, [snode],
12497                                       disk_info, None, None, 0, feedback_fn,
12498                                       self.diskparams)
12499     info = _GetInstanceInfoText(instance)
12500     feedback_fn("Creating aditional volumes...")
12501     # first, create the missing data and meta devices
12502     for disk in new_disks:
12503       # unfortunately this is... not too nice
12504       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12505                             info, True)
12506       for child in disk.children:
12507         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12508     # at this stage, all new LVs have been created, we can rename the
12509     # old ones
12510     feedback_fn("Renaming original volumes...")
12511     rename_list = [(o, n.children[0].logical_id)
12512                    for (o, n) in zip(instance.disks, new_disks)]
12513     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12514     result.Raise("Failed to rename original LVs")
12515
12516     feedback_fn("Initializing DRBD devices...")
12517     # all child devices are in place, we can now create the DRBD devices
12518     for disk in new_disks:
12519       for node in [pnode, snode]:
12520         f_create = node == pnode
12521         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12522
12523     # at this point, the instance has been modified
12524     instance.disk_template = constants.DT_DRBD8
12525     instance.disks = new_disks
12526     self.cfg.Update(instance, feedback_fn)
12527
12528     # Release node locks while waiting for sync
12529     _ReleaseLocks(self, locking.LEVEL_NODE)
12530
12531     # disks are created, waiting for sync
12532     disk_abort = not _WaitForSync(self, instance,
12533                                   oneshot=not self.op.wait_for_sync)
12534     if disk_abort:
12535       raise errors.OpExecError("There are some degraded disks for"
12536                                " this instance, please cleanup manually")
12537
12538     # Node resource locks will be released by caller
12539
12540   def _ConvertDrbdToPlain(self, feedback_fn):
12541     """Converts an instance from drbd to plain.
12542
12543     """
12544     instance = self.instance
12545
12546     assert len(instance.secondary_nodes) == 1
12547     assert instance.disk_template == constants.DT_DRBD8
12548
12549     pnode = instance.primary_node
12550     snode = instance.secondary_nodes[0]
12551     feedback_fn("Converting template to plain")
12552
12553     old_disks = instance.disks
12554     new_disks = [d.children[0] for d in old_disks]
12555
12556     # copy over size and mode
12557     for parent, child in zip(old_disks, new_disks):
12558       child.size = parent.size
12559       child.mode = parent.mode
12560
12561     # update instance structure
12562     instance.disks = new_disks
12563     instance.disk_template = constants.DT_PLAIN
12564     self.cfg.Update(instance, feedback_fn)
12565
12566     # Release locks in case removing disks takes a while
12567     _ReleaseLocks(self, locking.LEVEL_NODE)
12568
12569     feedback_fn("Removing volumes on the secondary node...")
12570     for disk in old_disks:
12571       self.cfg.SetDiskID(disk, snode)
12572       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12573       if msg:
12574         self.LogWarning("Could not remove block device %s on node %s,"
12575                         " continuing anyway: %s", disk.iv_name, snode, msg)
12576
12577     feedback_fn("Removing unneeded volumes on the primary node...")
12578     for idx, disk in enumerate(old_disks):
12579       meta = disk.children[1]
12580       self.cfg.SetDiskID(meta, pnode)
12581       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12582       if msg:
12583         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12584                         " continuing anyway: %s", idx, pnode, msg)
12585
12586     # this is a DRBD disk, return its port to the pool
12587     for disk in old_disks:
12588       tcp_port = disk.logical_id[2]
12589       self.cfg.AddTcpUdpPort(tcp_port)
12590
12591     # Node resource locks will be released by caller
12592
12593   def _CreateNewDisk(self, idx, params, _):
12594     """Creates a new disk.
12595
12596     """
12597     instance = self.instance
12598
12599     # add a new disk
12600     if instance.disk_template in constants.DTS_FILEBASED:
12601       (file_driver, file_path) = instance.disks[0].logical_id
12602       file_path = os.path.dirname(file_path)
12603     else:
12604       file_driver = file_path = None
12605
12606     disk = \
12607       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12608                             instance.primary_node, instance.secondary_nodes,
12609                             [params], file_path, file_driver, idx,
12610                             self.Log, self.diskparams)[0]
12611
12612     info = _GetInstanceInfoText(instance)
12613
12614     logging.info("Creating volume %s for instance %s",
12615                  disk.iv_name, instance.name)
12616     # Note: this needs to be kept in sync with _CreateDisks
12617     #HARDCODE
12618     for node in instance.all_nodes:
12619       f_create = (node == instance.primary_node)
12620       try:
12621         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12622       except errors.OpExecError, err:
12623         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12624                         disk.iv_name, disk, node, err)
12625
12626     return (disk, [
12627       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12628       ])
12629
12630   @staticmethod
12631   def _ModifyDisk(idx, disk, params, _):
12632     """Modifies a disk.
12633
12634     """
12635     disk.mode = params[constants.IDISK_MODE]
12636
12637     return [
12638       ("disk.mode/%d" % idx, disk.mode),
12639       ]
12640
12641   def _RemoveDisk(self, idx, root, _):
12642     """Removes a disk.
12643
12644     """
12645     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12646       self.cfg.SetDiskID(disk, node)
12647       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12648       if msg:
12649         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12650                         " continuing anyway", idx, node, msg)
12651
12652     # if this is a DRBD disk, return its port to the pool
12653     if root.dev_type in constants.LDS_DRBD:
12654       self.cfg.AddTcpUdpPort(root.logical_id[2])
12655
12656   @staticmethod
12657   def _CreateNewNic(idx, params, private):
12658     """Creates data structure for a new network interface.
12659
12660     """
12661     mac = params[constants.INIC_MAC]
12662     ip = params.get(constants.INIC_IP, None)
12663     nicparams = private.params
12664
12665     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12666       ("nic.%d" % idx,
12667        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12668        (mac, ip, private.filled[constants.NIC_MODE],
12669        private.filled[constants.NIC_LINK])),
12670       ])
12671
12672   @staticmethod
12673   def _ApplyNicMods(idx, nic, params, private):
12674     """Modifies a network interface.
12675
12676     """
12677     changes = []
12678
12679     for key in [constants.INIC_MAC, constants.INIC_IP]:
12680       if key in params:
12681         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12682         setattr(nic, key, params[key])
12683
12684     if private.params:
12685       nic.nicparams = private.params
12686
12687       for (key, val) in params.items():
12688         changes.append(("nic.%s/%d" % (key, idx), val))
12689
12690     return changes
12691
12692   def Exec(self, feedback_fn):
12693     """Modifies an instance.
12694
12695     All parameters take effect only at the next restart of the instance.
12696
12697     """
12698     # Process here the warnings from CheckPrereq, as we don't have a
12699     # feedback_fn there.
12700     # TODO: Replace with self.LogWarning
12701     for warn in self.warn:
12702       feedback_fn("WARNING: %s" % warn)
12703
12704     assert ((self.op.disk_template is None) ^
12705             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12706       "Not owning any node resource locks"
12707
12708     result = []
12709     instance = self.instance
12710
12711     # runtime memory
12712     if self.op.runtime_mem:
12713       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12714                                                      instance,
12715                                                      self.op.runtime_mem)
12716       rpcres.Raise("Cannot modify instance runtime memory")
12717       result.append(("runtime_memory", self.op.runtime_mem))
12718
12719     # Apply disk changes
12720     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12721                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12722     _UpdateIvNames(0, instance.disks)
12723
12724     if self.op.disk_template:
12725       if __debug__:
12726         check_nodes = set(instance.all_nodes)
12727         if self.op.remote_node:
12728           check_nodes.add(self.op.remote_node)
12729         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12730           owned = self.owned_locks(level)
12731           assert not (check_nodes - owned), \
12732             ("Not owning the correct locks, owning %r, expected at least %r" %
12733              (owned, check_nodes))
12734
12735       r_shut = _ShutdownInstanceDisks(self, instance)
12736       if not r_shut:
12737         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12738                                  " proceed with disk template conversion")
12739       mode = (instance.disk_template, self.op.disk_template)
12740       try:
12741         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12742       except:
12743         self.cfg.ReleaseDRBDMinors(instance.name)
12744         raise
12745       result.append(("disk_template", self.op.disk_template))
12746
12747       assert instance.disk_template == self.op.disk_template, \
12748         ("Expected disk template '%s', found '%s'" %
12749          (self.op.disk_template, instance.disk_template))
12750
12751     # Release node and resource locks if there are any (they might already have
12752     # been released during disk conversion)
12753     _ReleaseLocks(self, locking.LEVEL_NODE)
12754     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12755
12756     # Apply NIC changes
12757     if self._new_nics is not None:
12758       instance.nics = self._new_nics
12759       result.extend(self._nic_chgdesc)
12760
12761     # hvparams changes
12762     if self.op.hvparams:
12763       instance.hvparams = self.hv_inst
12764       for key, val in self.op.hvparams.iteritems():
12765         result.append(("hv/%s" % key, val))
12766
12767     # beparams changes
12768     if self.op.beparams:
12769       instance.beparams = self.be_inst
12770       for key, val in self.op.beparams.iteritems():
12771         result.append(("be/%s" % key, val))
12772
12773     # OS change
12774     if self.op.os_name:
12775       instance.os = self.op.os_name
12776
12777     # osparams changes
12778     if self.op.osparams:
12779       instance.osparams = self.os_inst
12780       for key, val in self.op.osparams.iteritems():
12781         result.append(("os/%s" % key, val))
12782
12783     if self.op.offline is None:
12784       # Ignore
12785       pass
12786     elif self.op.offline:
12787       # Mark instance as offline
12788       self.cfg.MarkInstanceOffline(instance.name)
12789       result.append(("admin_state", constants.ADMINST_OFFLINE))
12790     else:
12791       # Mark instance as online, but stopped
12792       self.cfg.MarkInstanceDown(instance.name)
12793       result.append(("admin_state", constants.ADMINST_DOWN))
12794
12795     self.cfg.Update(instance, feedback_fn)
12796
12797     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12798                 self.owned_locks(locking.LEVEL_NODE)), \
12799       "All node locks should have been released by now"
12800
12801     return result
12802
12803   _DISK_CONVERSIONS = {
12804     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12805     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12806     }
12807
12808
12809 class LUInstanceChangeGroup(LogicalUnit):
12810   HPATH = "instance-change-group"
12811   HTYPE = constants.HTYPE_INSTANCE
12812   REQ_BGL = False
12813
12814   def ExpandNames(self):
12815     self.share_locks = _ShareAll()
12816     self.needed_locks = {
12817       locking.LEVEL_NODEGROUP: [],
12818       locking.LEVEL_NODE: [],
12819       }
12820
12821     self._ExpandAndLockInstance()
12822
12823     if self.op.target_groups:
12824       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12825                                   self.op.target_groups)
12826     else:
12827       self.req_target_uuids = None
12828
12829     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12830
12831   def DeclareLocks(self, level):
12832     if level == locking.LEVEL_NODEGROUP:
12833       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12834
12835       if self.req_target_uuids:
12836         lock_groups = set(self.req_target_uuids)
12837
12838         # Lock all groups used by instance optimistically; this requires going
12839         # via the node before it's locked, requiring verification later on
12840         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12841         lock_groups.update(instance_groups)
12842       else:
12843         # No target groups, need to lock all of them
12844         lock_groups = locking.ALL_SET
12845
12846       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12847
12848     elif level == locking.LEVEL_NODE:
12849       if self.req_target_uuids:
12850         # Lock all nodes used by instances
12851         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12852         self._LockInstancesNodes()
12853
12854         # Lock all nodes in all potential target groups
12855         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12856                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12857         member_nodes = [node_name
12858                         for group in lock_groups
12859                         for node_name in self.cfg.GetNodeGroup(group).members]
12860         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12861       else:
12862         # Lock all nodes as all groups are potential targets
12863         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12864
12865   def CheckPrereq(self):
12866     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12867     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12868     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12869
12870     assert (self.req_target_uuids is None or
12871             owned_groups.issuperset(self.req_target_uuids))
12872     assert owned_instances == set([self.op.instance_name])
12873
12874     # Get instance information
12875     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12876
12877     # Check if node groups for locked instance are still correct
12878     assert owned_nodes.issuperset(self.instance.all_nodes), \
12879       ("Instance %s's nodes changed while we kept the lock" %
12880        self.op.instance_name)
12881
12882     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12883                                            owned_groups)
12884
12885     if self.req_target_uuids:
12886       # User requested specific target groups
12887       self.target_uuids = self.req_target_uuids
12888     else:
12889       # All groups except those used by the instance are potential targets
12890       self.target_uuids = owned_groups - inst_groups
12891
12892     conflicting_groups = self.target_uuids & inst_groups
12893     if conflicting_groups:
12894       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12895                                  " used by the instance '%s'" %
12896                                  (utils.CommaJoin(conflicting_groups),
12897                                   self.op.instance_name),
12898                                  errors.ECODE_INVAL)
12899
12900     if not self.target_uuids:
12901       raise errors.OpPrereqError("There are no possible target groups",
12902                                  errors.ECODE_INVAL)
12903
12904   def BuildHooksEnv(self):
12905     """Build hooks env.
12906
12907     """
12908     assert self.target_uuids
12909
12910     env = {
12911       "TARGET_GROUPS": " ".join(self.target_uuids),
12912       }
12913
12914     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12915
12916     return env
12917
12918   def BuildHooksNodes(self):
12919     """Build hooks nodes.
12920
12921     """
12922     mn = self.cfg.GetMasterNode()
12923     return ([mn], [mn])
12924
12925   def Exec(self, feedback_fn):
12926     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12927
12928     assert instances == [self.op.instance_name], "Instance not locked"
12929
12930     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12931                      instances=instances, target_groups=list(self.target_uuids))
12932
12933     ial.Run(self.op.iallocator)
12934
12935     if not ial.success:
12936       raise errors.OpPrereqError("Can't compute solution for changing group of"
12937                                  " instance '%s' using iallocator '%s': %s" %
12938                                  (self.op.instance_name, self.op.iallocator,
12939                                   ial.info),
12940                                  errors.ECODE_NORES)
12941
12942     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12943
12944     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12945                  " instance '%s'", len(jobs), self.op.instance_name)
12946
12947     return ResultWithJobs(jobs)
12948
12949
12950 class LUBackupQuery(NoHooksLU):
12951   """Query the exports list
12952
12953   """
12954   REQ_BGL = False
12955
12956   def ExpandNames(self):
12957     self.needed_locks = {}
12958     self.share_locks[locking.LEVEL_NODE] = 1
12959     if not self.op.nodes:
12960       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12961     else:
12962       self.needed_locks[locking.LEVEL_NODE] = \
12963         _GetWantedNodes(self, self.op.nodes)
12964
12965   def Exec(self, feedback_fn):
12966     """Compute the list of all the exported system images.
12967
12968     @rtype: dict
12969     @return: a dictionary with the structure node->(export-list)
12970         where export-list is a list of the instances exported on
12971         that node.
12972
12973     """
12974     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12975     rpcresult = self.rpc.call_export_list(self.nodes)
12976     result = {}
12977     for node in rpcresult:
12978       if rpcresult[node].fail_msg:
12979         result[node] = False
12980       else:
12981         result[node] = rpcresult[node].payload
12982
12983     return result
12984
12985
12986 class LUBackupPrepare(NoHooksLU):
12987   """Prepares an instance for an export and returns useful information.
12988
12989   """
12990   REQ_BGL = False
12991
12992   def ExpandNames(self):
12993     self._ExpandAndLockInstance()
12994
12995   def CheckPrereq(self):
12996     """Check prerequisites.
12997
12998     """
12999     instance_name = self.op.instance_name
13000
13001     self.instance = self.cfg.GetInstanceInfo(instance_name)
13002     assert self.instance is not None, \
13003           "Cannot retrieve locked instance %s" % self.op.instance_name
13004     _CheckNodeOnline(self, self.instance.primary_node)
13005
13006     self._cds = _GetClusterDomainSecret()
13007
13008   def Exec(self, feedback_fn):
13009     """Prepares an instance for an export.
13010
13011     """
13012     instance = self.instance
13013
13014     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13015       salt = utils.GenerateSecret(8)
13016
13017       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13018       result = self.rpc.call_x509_cert_create(instance.primary_node,
13019                                               constants.RIE_CERT_VALIDITY)
13020       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13021
13022       (name, cert_pem) = result.payload
13023
13024       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13025                                              cert_pem)
13026
13027       return {
13028         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13029         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13030                           salt),
13031         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13032         }
13033
13034     return None
13035
13036
13037 class LUBackupExport(LogicalUnit):
13038   """Export an instance to an image in the cluster.
13039
13040   """
13041   HPATH = "instance-export"
13042   HTYPE = constants.HTYPE_INSTANCE
13043   REQ_BGL = False
13044
13045   def CheckArguments(self):
13046     """Check the arguments.
13047
13048     """
13049     self.x509_key_name = self.op.x509_key_name
13050     self.dest_x509_ca_pem = self.op.destination_x509_ca
13051
13052     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13053       if not self.x509_key_name:
13054         raise errors.OpPrereqError("Missing X509 key name for encryption",
13055                                    errors.ECODE_INVAL)
13056
13057       if not self.dest_x509_ca_pem:
13058         raise errors.OpPrereqError("Missing destination X509 CA",
13059                                    errors.ECODE_INVAL)
13060
13061   def ExpandNames(self):
13062     self._ExpandAndLockInstance()
13063
13064     # Lock all nodes for local exports
13065     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13066       # FIXME: lock only instance primary and destination node
13067       #
13068       # Sad but true, for now we have do lock all nodes, as we don't know where
13069       # the previous export might be, and in this LU we search for it and
13070       # remove it from its current node. In the future we could fix this by:
13071       #  - making a tasklet to search (share-lock all), then create the
13072       #    new one, then one to remove, after
13073       #  - removing the removal operation altogether
13074       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13075
13076   def DeclareLocks(self, level):
13077     """Last minute lock declaration."""
13078     # All nodes are locked anyway, so nothing to do here.
13079
13080   def BuildHooksEnv(self):
13081     """Build hooks env.
13082
13083     This will run on the master, primary node and target node.
13084
13085     """
13086     env = {
13087       "EXPORT_MODE": self.op.mode,
13088       "EXPORT_NODE": self.op.target_node,
13089       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13090       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13091       # TODO: Generic function for boolean env variables
13092       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13093       }
13094
13095     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13096
13097     return env
13098
13099   def BuildHooksNodes(self):
13100     """Build hooks nodes.
13101
13102     """
13103     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13104
13105     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13106       nl.append(self.op.target_node)
13107
13108     return (nl, nl)
13109
13110   def CheckPrereq(self):
13111     """Check prerequisites.
13112
13113     This checks that the instance and node names are valid.
13114
13115     """
13116     instance_name = self.op.instance_name
13117
13118     self.instance = self.cfg.GetInstanceInfo(instance_name)
13119     assert self.instance is not None, \
13120           "Cannot retrieve locked instance %s" % self.op.instance_name
13121     _CheckNodeOnline(self, self.instance.primary_node)
13122
13123     if (self.op.remove_instance and
13124         self.instance.admin_state == constants.ADMINST_UP and
13125         not self.op.shutdown):
13126       raise errors.OpPrereqError("Can not remove instance without shutting it"
13127                                  " down before")
13128
13129     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13130       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13131       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13132       assert self.dst_node is not None
13133
13134       _CheckNodeOnline(self, self.dst_node.name)
13135       _CheckNodeNotDrained(self, self.dst_node.name)
13136
13137       self._cds = None
13138       self.dest_disk_info = None
13139       self.dest_x509_ca = None
13140
13141     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13142       self.dst_node = None
13143
13144       if len(self.op.target_node) != len(self.instance.disks):
13145         raise errors.OpPrereqError(("Received destination information for %s"
13146                                     " disks, but instance %s has %s disks") %
13147                                    (len(self.op.target_node), instance_name,
13148                                     len(self.instance.disks)),
13149                                    errors.ECODE_INVAL)
13150
13151       cds = _GetClusterDomainSecret()
13152
13153       # Check X509 key name
13154       try:
13155         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13156       except (TypeError, ValueError), err:
13157         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13158
13159       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13160         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13161                                    errors.ECODE_INVAL)
13162
13163       # Load and verify CA
13164       try:
13165         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13166       except OpenSSL.crypto.Error, err:
13167         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13168                                    (err, ), errors.ECODE_INVAL)
13169
13170       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13171       if errcode is not None:
13172         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13173                                    (msg, ), errors.ECODE_INVAL)
13174
13175       self.dest_x509_ca = cert
13176
13177       # Verify target information
13178       disk_info = []
13179       for idx, disk_data in enumerate(self.op.target_node):
13180         try:
13181           (host, port, magic) = \
13182             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13183         except errors.GenericError, err:
13184           raise errors.OpPrereqError("Target info for disk %s: %s" %
13185                                      (idx, err), errors.ECODE_INVAL)
13186
13187         disk_info.append((host, port, magic))
13188
13189       assert len(disk_info) == len(self.op.target_node)
13190       self.dest_disk_info = disk_info
13191
13192     else:
13193       raise errors.ProgrammerError("Unhandled export mode %r" %
13194                                    self.op.mode)
13195
13196     # instance disk type verification
13197     # TODO: Implement export support for file-based disks
13198     for disk in self.instance.disks:
13199       if disk.dev_type == constants.LD_FILE:
13200         raise errors.OpPrereqError("Export not supported for instances with"
13201                                    " file-based disks", errors.ECODE_INVAL)
13202
13203   def _CleanupExports(self, feedback_fn):
13204     """Removes exports of current instance from all other nodes.
13205
13206     If an instance in a cluster with nodes A..D was exported to node C, its
13207     exports will be removed from the nodes A, B and D.
13208
13209     """
13210     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13211
13212     nodelist = self.cfg.GetNodeList()
13213     nodelist.remove(self.dst_node.name)
13214
13215     # on one-node clusters nodelist will be empty after the removal
13216     # if we proceed the backup would be removed because OpBackupQuery
13217     # substitutes an empty list with the full cluster node list.
13218     iname = self.instance.name
13219     if nodelist:
13220       feedback_fn("Removing old exports for instance %s" % iname)
13221       exportlist = self.rpc.call_export_list(nodelist)
13222       for node in exportlist:
13223         if exportlist[node].fail_msg:
13224           continue
13225         if iname in exportlist[node].payload:
13226           msg = self.rpc.call_export_remove(node, iname).fail_msg
13227           if msg:
13228             self.LogWarning("Could not remove older export for instance %s"
13229                             " on node %s: %s", iname, node, msg)
13230
13231   def Exec(self, feedback_fn):
13232     """Export an instance to an image in the cluster.
13233
13234     """
13235     assert self.op.mode in constants.EXPORT_MODES
13236
13237     instance = self.instance
13238     src_node = instance.primary_node
13239
13240     if self.op.shutdown:
13241       # shutdown the instance, but not the disks
13242       feedback_fn("Shutting down instance %s" % instance.name)
13243       result = self.rpc.call_instance_shutdown(src_node, instance,
13244                                                self.op.shutdown_timeout)
13245       # TODO: Maybe ignore failures if ignore_remove_failures is set
13246       result.Raise("Could not shutdown instance %s on"
13247                    " node %s" % (instance.name, src_node))
13248
13249     # set the disks ID correctly since call_instance_start needs the
13250     # correct drbd minor to create the symlinks
13251     for disk in instance.disks:
13252       self.cfg.SetDiskID(disk, src_node)
13253
13254     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13255
13256     if activate_disks:
13257       # Activate the instance disks if we'exporting a stopped instance
13258       feedback_fn("Activating disks for %s" % instance.name)
13259       _StartInstanceDisks(self, instance, None)
13260
13261     try:
13262       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13263                                                      instance)
13264
13265       helper.CreateSnapshots()
13266       try:
13267         if (self.op.shutdown and
13268             instance.admin_state == constants.ADMINST_UP and
13269             not self.op.remove_instance):
13270           assert not activate_disks
13271           feedback_fn("Starting instance %s" % instance.name)
13272           result = self.rpc.call_instance_start(src_node,
13273                                                 (instance, None, None), False)
13274           msg = result.fail_msg
13275           if msg:
13276             feedback_fn("Failed to start instance: %s" % msg)
13277             _ShutdownInstanceDisks(self, instance)
13278             raise errors.OpExecError("Could not start instance: %s" % msg)
13279
13280         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13281           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13282         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13283           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13284           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13285
13286           (key_name, _, _) = self.x509_key_name
13287
13288           dest_ca_pem = \
13289             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13290                                             self.dest_x509_ca)
13291
13292           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13293                                                      key_name, dest_ca_pem,
13294                                                      timeouts)
13295       finally:
13296         helper.Cleanup()
13297
13298       # Check for backwards compatibility
13299       assert len(dresults) == len(instance.disks)
13300       assert compat.all(isinstance(i, bool) for i in dresults), \
13301              "Not all results are boolean: %r" % dresults
13302
13303     finally:
13304       if activate_disks:
13305         feedback_fn("Deactivating disks for %s" % instance.name)
13306         _ShutdownInstanceDisks(self, instance)
13307
13308     if not (compat.all(dresults) and fin_resu):
13309       failures = []
13310       if not fin_resu:
13311         failures.append("export finalization")
13312       if not compat.all(dresults):
13313         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13314                                if not dsk)
13315         failures.append("disk export: disk(s) %s" % fdsk)
13316
13317       raise errors.OpExecError("Export failed, errors in %s" %
13318                                utils.CommaJoin(failures))
13319
13320     # At this point, the export was successful, we can cleanup/finish
13321
13322     # Remove instance if requested
13323     if self.op.remove_instance:
13324       feedback_fn("Removing instance %s" % instance.name)
13325       _RemoveInstance(self, feedback_fn, instance,
13326                       self.op.ignore_remove_failures)
13327
13328     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13329       self._CleanupExports(feedback_fn)
13330
13331     return fin_resu, dresults
13332
13333
13334 class LUBackupRemove(NoHooksLU):
13335   """Remove exports related to the named instance.
13336
13337   """
13338   REQ_BGL = False
13339
13340   def ExpandNames(self):
13341     self.needed_locks = {}
13342     # We need all nodes to be locked in order for RemoveExport to work, but we
13343     # don't need to lock the instance itself, as nothing will happen to it (and
13344     # we can remove exports also for a removed instance)
13345     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13346
13347   def Exec(self, feedback_fn):
13348     """Remove any export.
13349
13350     """
13351     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13352     # If the instance was not found we'll try with the name that was passed in.
13353     # This will only work if it was an FQDN, though.
13354     fqdn_warn = False
13355     if not instance_name:
13356       fqdn_warn = True
13357       instance_name = self.op.instance_name
13358
13359     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13360     exportlist = self.rpc.call_export_list(locked_nodes)
13361     found = False
13362     for node in exportlist:
13363       msg = exportlist[node].fail_msg
13364       if msg:
13365         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13366         continue
13367       if instance_name in exportlist[node].payload:
13368         found = True
13369         result = self.rpc.call_export_remove(node, instance_name)
13370         msg = result.fail_msg
13371         if msg:
13372           logging.error("Could not remove export for instance %s"
13373                         " on node %s: %s", instance_name, node, msg)
13374
13375     if fqdn_warn and not found:
13376       feedback_fn("Export not found. If trying to remove an export belonging"
13377                   " to a deleted instance please use its Fully Qualified"
13378                   " Domain Name.")
13379
13380
13381 class LUGroupAdd(LogicalUnit):
13382   """Logical unit for creating node groups.
13383
13384   """
13385   HPATH = "group-add"
13386   HTYPE = constants.HTYPE_GROUP
13387   REQ_BGL = False
13388
13389   def ExpandNames(self):
13390     # We need the new group's UUID here so that we can create and acquire the
13391     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13392     # that it should not check whether the UUID exists in the configuration.
13393     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13394     self.needed_locks = {}
13395     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13396
13397   def CheckPrereq(self):
13398     """Check prerequisites.
13399
13400     This checks that the given group name is not an existing node group
13401     already.
13402
13403     """
13404     try:
13405       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13406     except errors.OpPrereqError:
13407       pass
13408     else:
13409       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13410                                  " node group (UUID: %s)" %
13411                                  (self.op.group_name, existing_uuid),
13412                                  errors.ECODE_EXISTS)
13413
13414     if self.op.ndparams:
13415       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13416
13417     if self.op.hv_state:
13418       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13419     else:
13420       self.new_hv_state = None
13421
13422     if self.op.disk_state:
13423       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13424     else:
13425       self.new_disk_state = None
13426
13427     if self.op.diskparams:
13428       for templ in constants.DISK_TEMPLATES:
13429         if templ not in self.op.diskparams:
13430           self.op.diskparams[templ] = {}
13431         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13432     else:
13433       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13434
13435     if self.op.ipolicy:
13436       cluster = self.cfg.GetClusterInfo()
13437       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13438       try:
13439         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13440       except errors.ConfigurationError, err:
13441         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13442                                    errors.ECODE_INVAL)
13443
13444   def BuildHooksEnv(self):
13445     """Build hooks env.
13446
13447     """
13448     return {
13449       "GROUP_NAME": self.op.group_name,
13450       }
13451
13452   def BuildHooksNodes(self):
13453     """Build hooks nodes.
13454
13455     """
13456     mn = self.cfg.GetMasterNode()
13457     return ([mn], [mn])
13458
13459   def Exec(self, feedback_fn):
13460     """Add the node group to the cluster.
13461
13462     """
13463     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13464                                   uuid=self.group_uuid,
13465                                   alloc_policy=self.op.alloc_policy,
13466                                   ndparams=self.op.ndparams,
13467                                   diskparams=self.op.diskparams,
13468                                   ipolicy=self.op.ipolicy,
13469                                   hv_state_static=self.new_hv_state,
13470                                   disk_state_static=self.new_disk_state)
13471
13472     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13473     del self.remove_locks[locking.LEVEL_NODEGROUP]
13474
13475
13476 class LUGroupAssignNodes(NoHooksLU):
13477   """Logical unit for assigning nodes to groups.
13478
13479   """
13480   REQ_BGL = False
13481
13482   def ExpandNames(self):
13483     # These raise errors.OpPrereqError on their own:
13484     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13485     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13486
13487     # We want to lock all the affected nodes and groups. We have readily
13488     # available the list of nodes, and the *destination* group. To gather the
13489     # list of "source" groups, we need to fetch node information later on.
13490     self.needed_locks = {
13491       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13492       locking.LEVEL_NODE: self.op.nodes,
13493       }
13494
13495   def DeclareLocks(self, level):
13496     if level == locking.LEVEL_NODEGROUP:
13497       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13498
13499       # Try to get all affected nodes' groups without having the group or node
13500       # lock yet. Needs verification later in the code flow.
13501       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13502
13503       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13504
13505   def CheckPrereq(self):
13506     """Check prerequisites.
13507
13508     """
13509     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13510     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13511             frozenset(self.op.nodes))
13512
13513     expected_locks = (set([self.group_uuid]) |
13514                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13515     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13516     if actual_locks != expected_locks:
13517       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13518                                " current groups are '%s', used to be '%s'" %
13519                                (utils.CommaJoin(expected_locks),
13520                                 utils.CommaJoin(actual_locks)))
13521
13522     self.node_data = self.cfg.GetAllNodesInfo()
13523     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13524     instance_data = self.cfg.GetAllInstancesInfo()
13525
13526     if self.group is None:
13527       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13528                                (self.op.group_name, self.group_uuid))
13529
13530     (new_splits, previous_splits) = \
13531       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13532                                              for node in self.op.nodes],
13533                                             self.node_data, instance_data)
13534
13535     if new_splits:
13536       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13537
13538       if not self.op.force:
13539         raise errors.OpExecError("The following instances get split by this"
13540                                  " change and --force was not given: %s" %
13541                                  fmt_new_splits)
13542       else:
13543         self.LogWarning("This operation will split the following instances: %s",
13544                         fmt_new_splits)
13545
13546         if previous_splits:
13547           self.LogWarning("In addition, these already-split instances continue"
13548                           " to be split across groups: %s",
13549                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13550
13551   def Exec(self, feedback_fn):
13552     """Assign nodes to a new group.
13553
13554     """
13555     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13556
13557     self.cfg.AssignGroupNodes(mods)
13558
13559   @staticmethod
13560   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13561     """Check for split instances after a node assignment.
13562
13563     This method considers a series of node assignments as an atomic operation,
13564     and returns information about split instances after applying the set of
13565     changes.
13566
13567     In particular, it returns information about newly split instances, and
13568     instances that were already split, and remain so after the change.
13569
13570     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13571     considered.
13572
13573     @type changes: list of (node_name, new_group_uuid) pairs.
13574     @param changes: list of node assignments to consider.
13575     @param node_data: a dict with data for all nodes
13576     @param instance_data: a dict with all instances to consider
13577     @rtype: a two-tuple
13578     @return: a list of instances that were previously okay and result split as a
13579       consequence of this change, and a list of instances that were previously
13580       split and this change does not fix.
13581
13582     """
13583     changed_nodes = dict((node, group) for node, group in changes
13584                          if node_data[node].group != group)
13585
13586     all_split_instances = set()
13587     previously_split_instances = set()
13588
13589     def InstanceNodes(instance):
13590       return [instance.primary_node] + list(instance.secondary_nodes)
13591
13592     for inst in instance_data.values():
13593       if inst.disk_template not in constants.DTS_INT_MIRROR:
13594         continue
13595
13596       instance_nodes = InstanceNodes(inst)
13597
13598       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13599         previously_split_instances.add(inst.name)
13600
13601       if len(set(changed_nodes.get(node, node_data[node].group)
13602                  for node in instance_nodes)) > 1:
13603         all_split_instances.add(inst.name)
13604
13605     return (list(all_split_instances - previously_split_instances),
13606             list(previously_split_instances & all_split_instances))
13607
13608
13609 class _GroupQuery(_QueryBase):
13610   FIELDS = query.GROUP_FIELDS
13611
13612   def ExpandNames(self, lu):
13613     lu.needed_locks = {}
13614
13615     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13616     self._cluster = lu.cfg.GetClusterInfo()
13617     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13618
13619     if not self.names:
13620       self.wanted = [name_to_uuid[name]
13621                      for name in utils.NiceSort(name_to_uuid.keys())]
13622     else:
13623       # Accept names to be either names or UUIDs.
13624       missing = []
13625       self.wanted = []
13626       all_uuid = frozenset(self._all_groups.keys())
13627
13628       for name in self.names:
13629         if name in all_uuid:
13630           self.wanted.append(name)
13631         elif name in name_to_uuid:
13632           self.wanted.append(name_to_uuid[name])
13633         else:
13634           missing.append(name)
13635
13636       if missing:
13637         raise errors.OpPrereqError("Some groups do not exist: %s" %
13638                                    utils.CommaJoin(missing),
13639                                    errors.ECODE_NOENT)
13640
13641   def DeclareLocks(self, lu, level):
13642     pass
13643
13644   def _GetQueryData(self, lu):
13645     """Computes the list of node groups and their attributes.
13646
13647     """
13648     do_nodes = query.GQ_NODE in self.requested_data
13649     do_instances = query.GQ_INST in self.requested_data
13650
13651     group_to_nodes = None
13652     group_to_instances = None
13653
13654     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13655     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13656     # latter GetAllInstancesInfo() is not enough, for we have to go through
13657     # instance->node. Hence, we will need to process nodes even if we only need
13658     # instance information.
13659     if do_nodes or do_instances:
13660       all_nodes = lu.cfg.GetAllNodesInfo()
13661       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13662       node_to_group = {}
13663
13664       for node in all_nodes.values():
13665         if node.group in group_to_nodes:
13666           group_to_nodes[node.group].append(node.name)
13667           node_to_group[node.name] = node.group
13668
13669       if do_instances:
13670         all_instances = lu.cfg.GetAllInstancesInfo()
13671         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13672
13673         for instance in all_instances.values():
13674           node = instance.primary_node
13675           if node in node_to_group:
13676             group_to_instances[node_to_group[node]].append(instance.name)
13677
13678         if not do_nodes:
13679           # Do not pass on node information if it was not requested.
13680           group_to_nodes = None
13681
13682     return query.GroupQueryData(self._cluster,
13683                                 [self._all_groups[uuid]
13684                                  for uuid in self.wanted],
13685                                 group_to_nodes, group_to_instances)
13686
13687
13688 class LUGroupQuery(NoHooksLU):
13689   """Logical unit for querying node groups.
13690
13691   """
13692   REQ_BGL = False
13693
13694   def CheckArguments(self):
13695     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13696                           self.op.output_fields, False)
13697
13698   def ExpandNames(self):
13699     self.gq.ExpandNames(self)
13700
13701   def DeclareLocks(self, level):
13702     self.gq.DeclareLocks(self, level)
13703
13704   def Exec(self, feedback_fn):
13705     return self.gq.OldStyleQuery(self)
13706
13707
13708 class LUGroupSetParams(LogicalUnit):
13709   """Modifies the parameters of a node group.
13710
13711   """
13712   HPATH = "group-modify"
13713   HTYPE = constants.HTYPE_GROUP
13714   REQ_BGL = False
13715
13716   def CheckArguments(self):
13717     all_changes = [
13718       self.op.ndparams,
13719       self.op.diskparams,
13720       self.op.alloc_policy,
13721       self.op.hv_state,
13722       self.op.disk_state,
13723       self.op.ipolicy,
13724       ]
13725
13726     if all_changes.count(None) == len(all_changes):
13727       raise errors.OpPrereqError("Please pass at least one modification",
13728                                  errors.ECODE_INVAL)
13729
13730   def ExpandNames(self):
13731     # This raises errors.OpPrereqError on its own:
13732     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13733
13734     self.needed_locks = {
13735       locking.LEVEL_INSTANCE: [],
13736       locking.LEVEL_NODEGROUP: [self.group_uuid],
13737       }
13738
13739     self.share_locks[locking.LEVEL_INSTANCE] = 1
13740
13741   def DeclareLocks(self, level):
13742     if level == locking.LEVEL_INSTANCE:
13743       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13744
13745       # Lock instances optimistically, needs verification once group lock has
13746       # been acquired
13747       self.needed_locks[locking.LEVEL_INSTANCE] = \
13748           self.cfg.GetNodeGroupInstances(self.group_uuid)
13749
13750   def CheckPrereq(self):
13751     """Check prerequisites.
13752
13753     """
13754     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13755
13756     # Check if locked instances are still correct
13757     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13758
13759     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13760     cluster = self.cfg.GetClusterInfo()
13761
13762     if self.group is None:
13763       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13764                                (self.op.group_name, self.group_uuid))
13765
13766     if self.op.ndparams:
13767       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13768       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13769       self.new_ndparams = new_ndparams
13770
13771     if self.op.diskparams:
13772       self.new_diskparams = dict()
13773       for templ in constants.DISK_TEMPLATES:
13774         if templ not in self.op.diskparams:
13775           self.op.diskparams[templ] = {}
13776         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13777                                              self.op.diskparams[templ])
13778         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13779         self.new_diskparams[templ] = new_templ_params
13780
13781     if self.op.hv_state:
13782       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13783                                                  self.group.hv_state_static)
13784
13785     if self.op.disk_state:
13786       self.new_disk_state = \
13787         _MergeAndVerifyDiskState(self.op.disk_state,
13788                                  self.group.disk_state_static)
13789
13790     if self.op.ipolicy:
13791       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13792                                             self.op.ipolicy,
13793                                             group_policy=True)
13794
13795       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13796       inst_filter = lambda inst: inst.name in owned_instances
13797       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13798       violations = \
13799           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13800                                                                self.group),
13801                                         new_ipolicy, instances)
13802
13803       if violations:
13804         self.LogWarning("After the ipolicy change the following instances"
13805                         " violate them: %s",
13806                         utils.CommaJoin(violations))
13807
13808   def BuildHooksEnv(self):
13809     """Build hooks env.
13810
13811     """
13812     return {
13813       "GROUP_NAME": self.op.group_name,
13814       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13815       }
13816
13817   def BuildHooksNodes(self):
13818     """Build hooks nodes.
13819
13820     """
13821     mn = self.cfg.GetMasterNode()
13822     return ([mn], [mn])
13823
13824   def Exec(self, feedback_fn):
13825     """Modifies the node group.
13826
13827     """
13828     result = []
13829
13830     if self.op.ndparams:
13831       self.group.ndparams = self.new_ndparams
13832       result.append(("ndparams", str(self.group.ndparams)))
13833
13834     if self.op.diskparams:
13835       self.group.diskparams = self.new_diskparams
13836       result.append(("diskparams", str(self.group.diskparams)))
13837
13838     if self.op.alloc_policy:
13839       self.group.alloc_policy = self.op.alloc_policy
13840
13841     if self.op.hv_state:
13842       self.group.hv_state_static = self.new_hv_state
13843
13844     if self.op.disk_state:
13845       self.group.disk_state_static = self.new_disk_state
13846
13847     if self.op.ipolicy:
13848       self.group.ipolicy = self.new_ipolicy
13849
13850     self.cfg.Update(self.group, feedback_fn)
13851     return result
13852
13853
13854 class LUGroupRemove(LogicalUnit):
13855   HPATH = "group-remove"
13856   HTYPE = constants.HTYPE_GROUP
13857   REQ_BGL = False
13858
13859   def ExpandNames(self):
13860     # This will raises errors.OpPrereqError on its own:
13861     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13862     self.needed_locks = {
13863       locking.LEVEL_NODEGROUP: [self.group_uuid],
13864       }
13865
13866   def CheckPrereq(self):
13867     """Check prerequisites.
13868
13869     This checks that the given group name exists as a node group, that is
13870     empty (i.e., contains no nodes), and that is not the last group of the
13871     cluster.
13872
13873     """
13874     # Verify that the group is empty.
13875     group_nodes = [node.name
13876                    for node in self.cfg.GetAllNodesInfo().values()
13877                    if node.group == self.group_uuid]
13878
13879     if group_nodes:
13880       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13881                                  " nodes: %s" %
13882                                  (self.op.group_name,
13883                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13884                                  errors.ECODE_STATE)
13885
13886     # Verify the cluster would not be left group-less.
13887     if len(self.cfg.GetNodeGroupList()) == 1:
13888       raise errors.OpPrereqError("Group '%s' is the only group,"
13889                                  " cannot be removed" %
13890                                  self.op.group_name,
13891                                  errors.ECODE_STATE)
13892
13893   def BuildHooksEnv(self):
13894     """Build hooks env.
13895
13896     """
13897     return {
13898       "GROUP_NAME": self.op.group_name,
13899       }
13900
13901   def BuildHooksNodes(self):
13902     """Build hooks nodes.
13903
13904     """
13905     mn = self.cfg.GetMasterNode()
13906     return ([mn], [mn])
13907
13908   def Exec(self, feedback_fn):
13909     """Remove the node group.
13910
13911     """
13912     try:
13913       self.cfg.RemoveNodeGroup(self.group_uuid)
13914     except errors.ConfigurationError:
13915       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13916                                (self.op.group_name, self.group_uuid))
13917
13918     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13919
13920
13921 class LUGroupRename(LogicalUnit):
13922   HPATH = "group-rename"
13923   HTYPE = constants.HTYPE_GROUP
13924   REQ_BGL = False
13925
13926   def ExpandNames(self):
13927     # This raises errors.OpPrereqError on its own:
13928     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13929
13930     self.needed_locks = {
13931       locking.LEVEL_NODEGROUP: [self.group_uuid],
13932       }
13933
13934   def CheckPrereq(self):
13935     """Check prerequisites.
13936
13937     Ensures requested new name is not yet used.
13938
13939     """
13940     try:
13941       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13942     except errors.OpPrereqError:
13943       pass
13944     else:
13945       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13946                                  " node group (UUID: %s)" %
13947                                  (self.op.new_name, new_name_uuid),
13948                                  errors.ECODE_EXISTS)
13949
13950   def BuildHooksEnv(self):
13951     """Build hooks env.
13952
13953     """
13954     return {
13955       "OLD_NAME": self.op.group_name,
13956       "NEW_NAME": self.op.new_name,
13957       }
13958
13959   def BuildHooksNodes(self):
13960     """Build hooks nodes.
13961
13962     """
13963     mn = self.cfg.GetMasterNode()
13964
13965     all_nodes = self.cfg.GetAllNodesInfo()
13966     all_nodes.pop(mn, None)
13967
13968     run_nodes = [mn]
13969     run_nodes.extend(node.name for node in all_nodes.values()
13970                      if node.group == self.group_uuid)
13971
13972     return (run_nodes, run_nodes)
13973
13974   def Exec(self, feedback_fn):
13975     """Rename the node group.
13976
13977     """
13978     group = self.cfg.GetNodeGroup(self.group_uuid)
13979
13980     if group is None:
13981       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13982                                (self.op.group_name, self.group_uuid))
13983
13984     group.name = self.op.new_name
13985     self.cfg.Update(group, feedback_fn)
13986
13987     return self.op.new_name
13988
13989
13990 class LUGroupEvacuate(LogicalUnit):
13991   HPATH = "group-evacuate"
13992   HTYPE = constants.HTYPE_GROUP
13993   REQ_BGL = False
13994
13995   def ExpandNames(self):
13996     # This raises errors.OpPrereqError on its own:
13997     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13998
13999     if self.op.target_groups:
14000       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14001                                   self.op.target_groups)
14002     else:
14003       self.req_target_uuids = []
14004
14005     if self.group_uuid in self.req_target_uuids:
14006       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14007                                  " as a target group (targets are %s)" %
14008                                  (self.group_uuid,
14009                                   utils.CommaJoin(self.req_target_uuids)),
14010                                  errors.ECODE_INVAL)
14011
14012     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14013
14014     self.share_locks = _ShareAll()
14015     self.needed_locks = {
14016       locking.LEVEL_INSTANCE: [],
14017       locking.LEVEL_NODEGROUP: [],
14018       locking.LEVEL_NODE: [],
14019       }
14020
14021   def DeclareLocks(self, level):
14022     if level == locking.LEVEL_INSTANCE:
14023       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14024
14025       # Lock instances optimistically, needs verification once node and group
14026       # locks have been acquired
14027       self.needed_locks[locking.LEVEL_INSTANCE] = \
14028         self.cfg.GetNodeGroupInstances(self.group_uuid)
14029
14030     elif level == locking.LEVEL_NODEGROUP:
14031       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14032
14033       if self.req_target_uuids:
14034         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14035
14036         # Lock all groups used by instances optimistically; this requires going
14037         # via the node before it's locked, requiring verification later on
14038         lock_groups.update(group_uuid
14039                            for instance_name in
14040                              self.owned_locks(locking.LEVEL_INSTANCE)
14041                            for group_uuid in
14042                              self.cfg.GetInstanceNodeGroups(instance_name))
14043       else:
14044         # No target groups, need to lock all of them
14045         lock_groups = locking.ALL_SET
14046
14047       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14048
14049     elif level == locking.LEVEL_NODE:
14050       # This will only lock the nodes in the group to be evacuated which
14051       # contain actual instances
14052       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14053       self._LockInstancesNodes()
14054
14055       # Lock all nodes in group to be evacuated and target groups
14056       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14057       assert self.group_uuid in owned_groups
14058       member_nodes = [node_name
14059                       for group in owned_groups
14060                       for node_name in self.cfg.GetNodeGroup(group).members]
14061       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14062
14063   def CheckPrereq(self):
14064     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14065     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14066     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14067
14068     assert owned_groups.issuperset(self.req_target_uuids)
14069     assert self.group_uuid in owned_groups
14070
14071     # Check if locked instances are still correct
14072     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14073
14074     # Get instance information
14075     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14076
14077     # Check if node groups for locked instances are still correct
14078     for instance_name in owned_instances:
14079       inst = self.instances[instance_name]
14080       assert owned_nodes.issuperset(inst.all_nodes), \
14081         "Instance %s's nodes changed while we kept the lock" % instance_name
14082
14083       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14084                                              owned_groups)
14085
14086       assert self.group_uuid in inst_groups, \
14087         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14088
14089     if self.req_target_uuids:
14090       # User requested specific target groups
14091       self.target_uuids = self.req_target_uuids
14092     else:
14093       # All groups except the one to be evacuated are potential targets
14094       self.target_uuids = [group_uuid for group_uuid in owned_groups
14095                            if group_uuid != self.group_uuid]
14096
14097       if not self.target_uuids:
14098         raise errors.OpPrereqError("There are no possible target groups",
14099                                    errors.ECODE_INVAL)
14100
14101   def BuildHooksEnv(self):
14102     """Build hooks env.
14103
14104     """
14105     return {
14106       "GROUP_NAME": self.op.group_name,
14107       "TARGET_GROUPS": " ".join(self.target_uuids),
14108       }
14109
14110   def BuildHooksNodes(self):
14111     """Build hooks nodes.
14112
14113     """
14114     mn = self.cfg.GetMasterNode()
14115
14116     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14117
14118     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14119
14120     return (run_nodes, run_nodes)
14121
14122   def Exec(self, feedback_fn):
14123     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14124
14125     assert self.group_uuid not in self.target_uuids
14126
14127     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14128                      instances=instances, target_groups=self.target_uuids)
14129
14130     ial.Run(self.op.iallocator)
14131
14132     if not ial.success:
14133       raise errors.OpPrereqError("Can't compute group evacuation using"
14134                                  " iallocator '%s': %s" %
14135                                  (self.op.iallocator, ial.info),
14136                                  errors.ECODE_NORES)
14137
14138     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14139
14140     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14141                  len(jobs), self.op.group_name)
14142
14143     return ResultWithJobs(jobs)
14144
14145
14146 class TagsLU(NoHooksLU): # pylint: disable=W0223
14147   """Generic tags LU.
14148
14149   This is an abstract class which is the parent of all the other tags LUs.
14150
14151   """
14152   def ExpandNames(self):
14153     self.group_uuid = None
14154     self.needed_locks = {}
14155     if self.op.kind == constants.TAG_NODE:
14156       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14157       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14158     elif self.op.kind == constants.TAG_INSTANCE:
14159       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14160       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14161     elif self.op.kind == constants.TAG_NODEGROUP:
14162       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14163
14164     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14165     # not possible to acquire the BGL based on opcode parameters)
14166
14167   def CheckPrereq(self):
14168     """Check prerequisites.
14169
14170     """
14171     if self.op.kind == constants.TAG_CLUSTER:
14172       self.target = self.cfg.GetClusterInfo()
14173     elif self.op.kind == constants.TAG_NODE:
14174       self.target = self.cfg.GetNodeInfo(self.op.name)
14175     elif self.op.kind == constants.TAG_INSTANCE:
14176       self.target = self.cfg.GetInstanceInfo(self.op.name)
14177     elif self.op.kind == constants.TAG_NODEGROUP:
14178       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14179     else:
14180       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14181                                  str(self.op.kind), errors.ECODE_INVAL)
14182
14183
14184 class LUTagsGet(TagsLU):
14185   """Returns the tags of a given object.
14186
14187   """
14188   REQ_BGL = False
14189
14190   def ExpandNames(self):
14191     TagsLU.ExpandNames(self)
14192
14193     # Share locks as this is only a read operation
14194     self.share_locks = _ShareAll()
14195
14196   def Exec(self, feedback_fn):
14197     """Returns the tag list.
14198
14199     """
14200     return list(self.target.GetTags())
14201
14202
14203 class LUTagsSearch(NoHooksLU):
14204   """Searches the tags for a given pattern.
14205
14206   """
14207   REQ_BGL = False
14208
14209   def ExpandNames(self):
14210     self.needed_locks = {}
14211
14212   def CheckPrereq(self):
14213     """Check prerequisites.
14214
14215     This checks the pattern passed for validity by compiling it.
14216
14217     """
14218     try:
14219       self.re = re.compile(self.op.pattern)
14220     except re.error, err:
14221       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14222                                  (self.op.pattern, err), errors.ECODE_INVAL)
14223
14224   def Exec(self, feedback_fn):
14225     """Returns the tag list.
14226
14227     """
14228     cfg = self.cfg
14229     tgts = [("/cluster", cfg.GetClusterInfo())]
14230     ilist = cfg.GetAllInstancesInfo().values()
14231     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14232     nlist = cfg.GetAllNodesInfo().values()
14233     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14234     tgts.extend(("/nodegroup/%s" % n.name, n)
14235                 for n in cfg.GetAllNodeGroupsInfo().values())
14236     results = []
14237     for path, target in tgts:
14238       for tag in target.GetTags():
14239         if self.re.search(tag):
14240           results.append((path, tag))
14241     return results
14242
14243
14244 class LUTagsSet(TagsLU):
14245   """Sets a tag on a given object.
14246
14247   """
14248   REQ_BGL = False
14249
14250   def CheckPrereq(self):
14251     """Check prerequisites.
14252
14253     This checks the type and length of the tag name and value.
14254
14255     """
14256     TagsLU.CheckPrereq(self)
14257     for tag in self.op.tags:
14258       objects.TaggableObject.ValidateTag(tag)
14259
14260   def Exec(self, feedback_fn):
14261     """Sets the tag.
14262
14263     """
14264     try:
14265       for tag in self.op.tags:
14266         self.target.AddTag(tag)
14267     except errors.TagError, err:
14268       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14269     self.cfg.Update(self.target, feedback_fn)
14270
14271
14272 class LUTagsDel(TagsLU):
14273   """Delete a list of tags from a given object.
14274
14275   """
14276   REQ_BGL = False
14277
14278   def CheckPrereq(self):
14279     """Check prerequisites.
14280
14281     This checks that we have the given tag.
14282
14283     """
14284     TagsLU.CheckPrereq(self)
14285     for tag in self.op.tags:
14286       objects.TaggableObject.ValidateTag(tag)
14287     del_tags = frozenset(self.op.tags)
14288     cur_tags = self.target.GetTags()
14289
14290     diff_tags = del_tags - cur_tags
14291     if diff_tags:
14292       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14293       raise errors.OpPrereqError("Tag(s) %s not found" %
14294                                  (utils.CommaJoin(diff_names), ),
14295                                  errors.ECODE_NOENT)
14296
14297   def Exec(self, feedback_fn):
14298     """Remove the tag from the object.
14299
14300     """
14301     for tag in self.op.tags:
14302       self.target.RemoveTag(tag)
14303     self.cfg.Update(self.target, feedback_fn)
14304
14305
14306 class LUTestDelay(NoHooksLU):
14307   """Sleep for a specified amount of time.
14308
14309   This LU sleeps on the master and/or nodes for a specified amount of
14310   time.
14311
14312   """
14313   REQ_BGL = False
14314
14315   def ExpandNames(self):
14316     """Expand names and set required locks.
14317
14318     This expands the node list, if any.
14319
14320     """
14321     self.needed_locks = {}
14322     if self.op.on_nodes:
14323       # _GetWantedNodes can be used here, but is not always appropriate to use
14324       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14325       # more information.
14326       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14327       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14328
14329   def _TestDelay(self):
14330     """Do the actual sleep.
14331
14332     """
14333     if self.op.on_master:
14334       if not utils.TestDelay(self.op.duration):
14335         raise errors.OpExecError("Error during master delay test")
14336     if self.op.on_nodes:
14337       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14338       for node, node_result in result.items():
14339         node_result.Raise("Failure during rpc call to node %s" % node)
14340
14341   def Exec(self, feedback_fn):
14342     """Execute the test delay opcode, with the wanted repetitions.
14343
14344     """
14345     if self.op.repeat == 0:
14346       self._TestDelay()
14347     else:
14348       top_value = self.op.repeat - 1
14349       for i in range(self.op.repeat):
14350         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14351         self._TestDelay()
14352
14353
14354 class LUTestJqueue(NoHooksLU):
14355   """Utility LU to test some aspects of the job queue.
14356
14357   """
14358   REQ_BGL = False
14359
14360   # Must be lower than default timeout for WaitForJobChange to see whether it
14361   # notices changed jobs
14362   _CLIENT_CONNECT_TIMEOUT = 20.0
14363   _CLIENT_CONFIRM_TIMEOUT = 60.0
14364
14365   @classmethod
14366   def _NotifyUsingSocket(cls, cb, errcls):
14367     """Opens a Unix socket and waits for another program to connect.
14368
14369     @type cb: callable
14370     @param cb: Callback to send socket name to client
14371     @type errcls: class
14372     @param errcls: Exception class to use for errors
14373
14374     """
14375     # Using a temporary directory as there's no easy way to create temporary
14376     # sockets without writing a custom loop around tempfile.mktemp and
14377     # socket.bind
14378     tmpdir = tempfile.mkdtemp()
14379     try:
14380       tmpsock = utils.PathJoin(tmpdir, "sock")
14381
14382       logging.debug("Creating temporary socket at %s", tmpsock)
14383       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14384       try:
14385         sock.bind(tmpsock)
14386         sock.listen(1)
14387
14388         # Send details to client
14389         cb(tmpsock)
14390
14391         # Wait for client to connect before continuing
14392         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14393         try:
14394           (conn, _) = sock.accept()
14395         except socket.error, err:
14396           raise errcls("Client didn't connect in time (%s)" % err)
14397       finally:
14398         sock.close()
14399     finally:
14400       # Remove as soon as client is connected
14401       shutil.rmtree(tmpdir)
14402
14403     # Wait for client to close
14404     try:
14405       try:
14406         # pylint: disable=E1101
14407         # Instance of '_socketobject' has no ... member
14408         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14409         conn.recv(1)
14410       except socket.error, err:
14411         raise errcls("Client failed to confirm notification (%s)" % err)
14412     finally:
14413       conn.close()
14414
14415   def _SendNotification(self, test, arg, sockname):
14416     """Sends a notification to the client.
14417
14418     @type test: string
14419     @param test: Test name
14420     @param arg: Test argument (depends on test)
14421     @type sockname: string
14422     @param sockname: Socket path
14423
14424     """
14425     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14426
14427   def _Notify(self, prereq, test, arg):
14428     """Notifies the client of a test.
14429
14430     @type prereq: bool
14431     @param prereq: Whether this is a prereq-phase test
14432     @type test: string
14433     @param test: Test name
14434     @param arg: Test argument (depends on test)
14435
14436     """
14437     if prereq:
14438       errcls = errors.OpPrereqError
14439     else:
14440       errcls = errors.OpExecError
14441
14442     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14443                                                   test, arg),
14444                                    errcls)
14445
14446   def CheckArguments(self):
14447     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14448     self.expandnames_calls = 0
14449
14450   def ExpandNames(self):
14451     checkargs_calls = getattr(self, "checkargs_calls", 0)
14452     if checkargs_calls < 1:
14453       raise errors.ProgrammerError("CheckArguments was not called")
14454
14455     self.expandnames_calls += 1
14456
14457     if self.op.notify_waitlock:
14458       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14459
14460     self.LogInfo("Expanding names")
14461
14462     # Get lock on master node (just to get a lock, not for a particular reason)
14463     self.needed_locks = {
14464       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14465       }
14466
14467   def Exec(self, feedback_fn):
14468     if self.expandnames_calls < 1:
14469       raise errors.ProgrammerError("ExpandNames was not called")
14470
14471     if self.op.notify_exec:
14472       self._Notify(False, constants.JQT_EXEC, None)
14473
14474     self.LogInfo("Executing")
14475
14476     if self.op.log_messages:
14477       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14478       for idx, msg in enumerate(self.op.log_messages):
14479         self.LogInfo("Sending log message %s", idx + 1)
14480         feedback_fn(constants.JQT_MSGPREFIX + msg)
14481         # Report how many test messages have been sent
14482         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14483
14484     if self.op.fail:
14485       raise errors.OpExecError("Opcode failure was requested")
14486
14487     return True
14488
14489
14490 class IAllocator(object):
14491   """IAllocator framework.
14492
14493   An IAllocator instance has three sets of attributes:
14494     - cfg that is needed to query the cluster
14495     - input data (all members of the _KEYS class attribute are required)
14496     - four buffer attributes (in|out_data|text), that represent the
14497       input (to the external script) in text and data structure format,
14498       and the output from it, again in two formats
14499     - the result variables from the script (success, info, nodes) for
14500       easy usage
14501
14502   """
14503   # pylint: disable=R0902
14504   # lots of instance attributes
14505
14506   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14507     self.cfg = cfg
14508     self.rpc = rpc_runner
14509     # init buffer variables
14510     self.in_text = self.out_text = self.in_data = self.out_data = None
14511     # init all input fields so that pylint is happy
14512     self.mode = mode
14513     self.memory = self.disks = self.disk_template = None
14514     self.os = self.tags = self.nics = self.vcpus = None
14515     self.hypervisor = None
14516     self.relocate_from = None
14517     self.name = None
14518     self.instances = None
14519     self.evac_mode = None
14520     self.target_groups = []
14521     # computed fields
14522     self.required_nodes = None
14523     # init result fields
14524     self.success = self.info = self.result = None
14525
14526     try:
14527       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14528     except KeyError:
14529       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14530                                    " IAllocator" % self.mode)
14531
14532     keyset = [n for (n, _) in keydata]
14533
14534     for key in kwargs:
14535       if key not in keyset:
14536         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14537                                      " IAllocator" % key)
14538       setattr(self, key, kwargs[key])
14539
14540     for key in keyset:
14541       if key not in kwargs:
14542         raise errors.ProgrammerError("Missing input parameter '%s' to"
14543                                      " IAllocator" % key)
14544     self._BuildInputData(compat.partial(fn, self), keydata)
14545
14546   def _ComputeClusterData(self):
14547     """Compute the generic allocator input data.
14548
14549     This is the data that is independent of the actual operation.
14550
14551     """
14552     cfg = self.cfg
14553     cluster_info = cfg.GetClusterInfo()
14554     # cluster data
14555     data = {
14556       "version": constants.IALLOCATOR_VERSION,
14557       "cluster_name": cfg.GetClusterName(),
14558       "cluster_tags": list(cluster_info.GetTags()),
14559       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14560       "ipolicy": cluster_info.ipolicy,
14561       }
14562     ninfo = cfg.GetAllNodesInfo()
14563     iinfo = cfg.GetAllInstancesInfo().values()
14564     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14565
14566     # node data
14567     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14568
14569     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14570       hypervisor_name = self.hypervisor
14571     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14572       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14573     else:
14574       hypervisor_name = cluster_info.primary_hypervisor
14575
14576     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14577                                         [hypervisor_name])
14578     node_iinfo = \
14579       self.rpc.call_all_instances_info(node_list,
14580                                        cluster_info.enabled_hypervisors)
14581
14582     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14583
14584     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14585     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14586                                                  i_list, config_ndata)
14587     assert len(data["nodes"]) == len(ninfo), \
14588         "Incomplete node data computed"
14589
14590     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14591
14592     self.in_data = data
14593
14594   @staticmethod
14595   def _ComputeNodeGroupData(cfg):
14596     """Compute node groups data.
14597
14598     """
14599     cluster = cfg.GetClusterInfo()
14600     ng = dict((guuid, {
14601       "name": gdata.name,
14602       "alloc_policy": gdata.alloc_policy,
14603       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14604       })
14605       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14606
14607     return ng
14608
14609   @staticmethod
14610   def _ComputeBasicNodeData(cfg, node_cfg):
14611     """Compute global node data.
14612
14613     @rtype: dict
14614     @returns: a dict of name: (node dict, node config)
14615
14616     """
14617     # fill in static (config-based) values
14618     node_results = dict((ninfo.name, {
14619       "tags": list(ninfo.GetTags()),
14620       "primary_ip": ninfo.primary_ip,
14621       "secondary_ip": ninfo.secondary_ip,
14622       "offline": ninfo.offline,
14623       "drained": ninfo.drained,
14624       "master_candidate": ninfo.master_candidate,
14625       "group": ninfo.group,
14626       "master_capable": ninfo.master_capable,
14627       "vm_capable": ninfo.vm_capable,
14628       "ndparams": cfg.GetNdParams(ninfo),
14629       })
14630       for ninfo in node_cfg.values())
14631
14632     return node_results
14633
14634   @staticmethod
14635   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14636                               node_results):
14637     """Compute global node data.
14638
14639     @param node_results: the basic node structures as filled from the config
14640
14641     """
14642     #TODO(dynmem): compute the right data on MAX and MIN memory
14643     # make a copy of the current dict
14644     node_results = dict(node_results)
14645     for nname, nresult in node_data.items():
14646       assert nname in node_results, "Missing basic data for node %s" % nname
14647       ninfo = node_cfg[nname]
14648
14649       if not (ninfo.offline or ninfo.drained):
14650         nresult.Raise("Can't get data for node %s" % nname)
14651         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14652                                 nname)
14653         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14654
14655         for attr in ["memory_total", "memory_free", "memory_dom0",
14656                      "vg_size", "vg_free", "cpu_total"]:
14657           if attr not in remote_info:
14658             raise errors.OpExecError("Node '%s' didn't return attribute"
14659                                      " '%s'" % (nname, attr))
14660           if not isinstance(remote_info[attr], int):
14661             raise errors.OpExecError("Node '%s' returned invalid value"
14662                                      " for '%s': %s" %
14663                                      (nname, attr, remote_info[attr]))
14664         # compute memory used by primary instances
14665         i_p_mem = i_p_up_mem = 0
14666         for iinfo, beinfo in i_list:
14667           if iinfo.primary_node == nname:
14668             i_p_mem += beinfo[constants.BE_MAXMEM]
14669             if iinfo.name not in node_iinfo[nname].payload:
14670               i_used_mem = 0
14671             else:
14672               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14673             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14674             remote_info["memory_free"] -= max(0, i_mem_diff)
14675
14676             if iinfo.admin_state == constants.ADMINST_UP:
14677               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14678
14679         # compute memory used by instances
14680         pnr_dyn = {
14681           "total_memory": remote_info["memory_total"],
14682           "reserved_memory": remote_info["memory_dom0"],
14683           "free_memory": remote_info["memory_free"],
14684           "total_disk": remote_info["vg_size"],
14685           "free_disk": remote_info["vg_free"],
14686           "total_cpus": remote_info["cpu_total"],
14687           "i_pri_memory": i_p_mem,
14688           "i_pri_up_memory": i_p_up_mem,
14689           }
14690         pnr_dyn.update(node_results[nname])
14691         node_results[nname] = pnr_dyn
14692
14693     return node_results
14694
14695   @staticmethod
14696   def _ComputeInstanceData(cluster_info, i_list):
14697     """Compute global instance data.
14698
14699     """
14700     instance_data = {}
14701     for iinfo, beinfo in i_list:
14702       nic_data = []
14703       for nic in iinfo.nics:
14704         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14705         nic_dict = {
14706           "mac": nic.mac,
14707           "ip": nic.ip,
14708           "mode": filled_params[constants.NIC_MODE],
14709           "link": filled_params[constants.NIC_LINK],
14710           }
14711         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14712           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14713         nic_data.append(nic_dict)
14714       pir = {
14715         "tags": list(iinfo.GetTags()),
14716         "admin_state": iinfo.admin_state,
14717         "vcpus": beinfo[constants.BE_VCPUS],
14718         "memory": beinfo[constants.BE_MAXMEM],
14719         "os": iinfo.os,
14720         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14721         "nics": nic_data,
14722         "disks": [{constants.IDISK_SIZE: dsk.size,
14723                    constants.IDISK_MODE: dsk.mode}
14724                   for dsk in iinfo.disks],
14725         "disk_template": iinfo.disk_template,
14726         "hypervisor": iinfo.hypervisor,
14727         }
14728       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14729                                                  pir["disks"])
14730       instance_data[iinfo.name] = pir
14731
14732     return instance_data
14733
14734   def _AddNewInstance(self):
14735     """Add new instance data to allocator structure.
14736
14737     This in combination with _AllocatorGetClusterData will create the
14738     correct structure needed as input for the allocator.
14739
14740     The checks for the completeness of the opcode must have already been
14741     done.
14742
14743     """
14744     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14745
14746     if self.disk_template in constants.DTS_INT_MIRROR:
14747       self.required_nodes = 2
14748     else:
14749       self.required_nodes = 1
14750
14751     request = {
14752       "name": self.name,
14753       "disk_template": self.disk_template,
14754       "tags": self.tags,
14755       "os": self.os,
14756       "vcpus": self.vcpus,
14757       "memory": self.memory,
14758       "disks": self.disks,
14759       "disk_space_total": disk_space,
14760       "nics": self.nics,
14761       "required_nodes": self.required_nodes,
14762       "hypervisor": self.hypervisor,
14763       }
14764
14765     return request
14766
14767   def _AddRelocateInstance(self):
14768     """Add relocate instance data to allocator structure.
14769
14770     This in combination with _IAllocatorGetClusterData will create the
14771     correct structure needed as input for the allocator.
14772
14773     The checks for the completeness of the opcode must have already been
14774     done.
14775
14776     """
14777     instance = self.cfg.GetInstanceInfo(self.name)
14778     if instance is None:
14779       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14780                                    " IAllocator" % self.name)
14781
14782     if instance.disk_template not in constants.DTS_MIRRORED:
14783       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14784                                  errors.ECODE_INVAL)
14785
14786     if instance.disk_template in constants.DTS_INT_MIRROR and \
14787         len(instance.secondary_nodes) != 1:
14788       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14789                                  errors.ECODE_STATE)
14790
14791     self.required_nodes = 1
14792     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14793     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14794
14795     request = {
14796       "name": self.name,
14797       "disk_space_total": disk_space,
14798       "required_nodes": self.required_nodes,
14799       "relocate_from": self.relocate_from,
14800       }
14801     return request
14802
14803   def _AddNodeEvacuate(self):
14804     """Get data for node-evacuate requests.
14805
14806     """
14807     return {
14808       "instances": self.instances,
14809       "evac_mode": self.evac_mode,
14810       }
14811
14812   def _AddChangeGroup(self):
14813     """Get data for node-evacuate requests.
14814
14815     """
14816     return {
14817       "instances": self.instances,
14818       "target_groups": self.target_groups,
14819       }
14820
14821   def _BuildInputData(self, fn, keydata):
14822     """Build input data structures.
14823
14824     """
14825     self._ComputeClusterData()
14826
14827     request = fn()
14828     request["type"] = self.mode
14829     for keyname, keytype in keydata:
14830       if keyname not in request:
14831         raise errors.ProgrammerError("Request parameter %s is missing" %
14832                                      keyname)
14833       val = request[keyname]
14834       if not keytype(val):
14835         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14836                                      " validation, value %s, expected"
14837                                      " type %s" % (keyname, val, keytype))
14838     self.in_data["request"] = request
14839
14840     self.in_text = serializer.Dump(self.in_data)
14841
14842   _STRING_LIST = ht.TListOf(ht.TString)
14843   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14844      # pylint: disable=E1101
14845      # Class '...' has no 'OP_ID' member
14846      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14847                           opcodes.OpInstanceMigrate.OP_ID,
14848                           opcodes.OpInstanceReplaceDisks.OP_ID])
14849      })))
14850
14851   _NEVAC_MOVED = \
14852     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14853                        ht.TItems([ht.TNonEmptyString,
14854                                   ht.TNonEmptyString,
14855                                   ht.TListOf(ht.TNonEmptyString),
14856                                  ])))
14857   _NEVAC_FAILED = \
14858     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14859                        ht.TItems([ht.TNonEmptyString,
14860                                   ht.TMaybeString,
14861                                  ])))
14862   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14863                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14864
14865   _MODE_DATA = {
14866     constants.IALLOCATOR_MODE_ALLOC:
14867       (_AddNewInstance,
14868        [
14869         ("name", ht.TString),
14870         ("memory", ht.TInt),
14871         ("disks", ht.TListOf(ht.TDict)),
14872         ("disk_template", ht.TString),
14873         ("os", ht.TString),
14874         ("tags", _STRING_LIST),
14875         ("nics", ht.TListOf(ht.TDict)),
14876         ("vcpus", ht.TInt),
14877         ("hypervisor", ht.TString),
14878         ], ht.TList),
14879     constants.IALLOCATOR_MODE_RELOC:
14880       (_AddRelocateInstance,
14881        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14882        ht.TList),
14883      constants.IALLOCATOR_MODE_NODE_EVAC:
14884       (_AddNodeEvacuate, [
14885         ("instances", _STRING_LIST),
14886         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14887         ], _NEVAC_RESULT),
14888      constants.IALLOCATOR_MODE_CHG_GROUP:
14889       (_AddChangeGroup, [
14890         ("instances", _STRING_LIST),
14891         ("target_groups", _STRING_LIST),
14892         ], _NEVAC_RESULT),
14893     }
14894
14895   def Run(self, name, validate=True, call_fn=None):
14896     """Run an instance allocator and return the results.
14897
14898     """
14899     if call_fn is None:
14900       call_fn = self.rpc.call_iallocator_runner
14901
14902     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14903     result.Raise("Failure while running the iallocator script")
14904
14905     self.out_text = result.payload
14906     if validate:
14907       self._ValidateResult()
14908
14909   def _ValidateResult(self):
14910     """Process the allocator results.
14911
14912     This will process and if successful save the result in
14913     self.out_data and the other parameters.
14914
14915     """
14916     try:
14917       rdict = serializer.Load(self.out_text)
14918     except Exception, err:
14919       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14920
14921     if not isinstance(rdict, dict):
14922       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14923
14924     # TODO: remove backwards compatiblity in later versions
14925     if "nodes" in rdict and "result" not in rdict:
14926       rdict["result"] = rdict["nodes"]
14927       del rdict["nodes"]
14928
14929     for key in "success", "info", "result":
14930       if key not in rdict:
14931         raise errors.OpExecError("Can't parse iallocator results:"
14932                                  " missing key '%s'" % key)
14933       setattr(self, key, rdict[key])
14934
14935     if not self._result_check(self.result):
14936       raise errors.OpExecError("Iallocator returned invalid result,"
14937                                " expected %s, got %s" %
14938                                (self._result_check, self.result),
14939                                errors.ECODE_INVAL)
14940
14941     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14942       assert self.relocate_from is not None
14943       assert self.required_nodes == 1
14944
14945       node2group = dict((name, ndata["group"])
14946                         for (name, ndata) in self.in_data["nodes"].items())
14947
14948       fn = compat.partial(self._NodesToGroups, node2group,
14949                           self.in_data["nodegroups"])
14950
14951       instance = self.cfg.GetInstanceInfo(self.name)
14952       request_groups = fn(self.relocate_from + [instance.primary_node])
14953       result_groups = fn(rdict["result"] + [instance.primary_node])
14954
14955       if self.success and not set(result_groups).issubset(request_groups):
14956         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14957                                  " differ from original groups (%s)" %
14958                                  (utils.CommaJoin(result_groups),
14959                                   utils.CommaJoin(request_groups)))
14960
14961     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14962       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14963
14964     self.out_data = rdict
14965
14966   @staticmethod
14967   def _NodesToGroups(node2group, groups, nodes):
14968     """Returns a list of unique group names for a list of nodes.
14969
14970     @type node2group: dict
14971     @param node2group: Map from node name to group UUID
14972     @type groups: dict
14973     @param groups: Group information
14974     @type nodes: list
14975     @param nodes: Node names
14976
14977     """
14978     result = set()
14979
14980     for node in nodes:
14981       try:
14982         group_uuid = node2group[node]
14983       except KeyError:
14984         # Ignore unknown node
14985         pass
14986       else:
14987         try:
14988           group = groups[group_uuid]
14989         except KeyError:
14990           # Can't find group, let's use UUID
14991           group_name = group_uuid
14992         else:
14993           group_name = group["name"]
14994
14995         result.add(group_name)
14996
14997     return sorted(result)
14998
14999
15000 class LUTestAllocator(NoHooksLU):
15001   """Run allocator tests.
15002
15003   This LU runs the allocator tests
15004
15005   """
15006   def CheckPrereq(self):
15007     """Check prerequisites.
15008
15009     This checks the opcode parameters depending on the director and mode test.
15010
15011     """
15012     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15013       for attr in ["memory", "disks", "disk_template",
15014                    "os", "tags", "nics", "vcpus"]:
15015         if not hasattr(self.op, attr):
15016           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15017                                      attr, errors.ECODE_INVAL)
15018       iname = self.cfg.ExpandInstanceName(self.op.name)
15019       if iname is not None:
15020         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15021                                    iname, errors.ECODE_EXISTS)
15022       if not isinstance(self.op.nics, list):
15023         raise errors.OpPrereqError("Invalid parameter 'nics'",
15024                                    errors.ECODE_INVAL)
15025       if not isinstance(self.op.disks, list):
15026         raise errors.OpPrereqError("Invalid parameter 'disks'",
15027                                    errors.ECODE_INVAL)
15028       for row in self.op.disks:
15029         if (not isinstance(row, dict) or
15030             constants.IDISK_SIZE not in row or
15031             not isinstance(row[constants.IDISK_SIZE], int) or
15032             constants.IDISK_MODE not in row or
15033             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15034           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15035                                      " parameter", errors.ECODE_INVAL)
15036       if self.op.hypervisor is None:
15037         self.op.hypervisor = self.cfg.GetHypervisorType()
15038     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15039       fname = _ExpandInstanceName(self.cfg, self.op.name)
15040       self.op.name = fname
15041       self.relocate_from = \
15042           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15043     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15044                           constants.IALLOCATOR_MODE_NODE_EVAC):
15045       if not self.op.instances:
15046         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15047       self.op.instances = _GetWantedInstances(self, self.op.instances)
15048     else:
15049       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15050                                  self.op.mode, errors.ECODE_INVAL)
15051
15052     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15053       if self.op.allocator is None:
15054         raise errors.OpPrereqError("Missing allocator name",
15055                                    errors.ECODE_INVAL)
15056     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15057       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15058                                  self.op.direction, errors.ECODE_INVAL)
15059
15060   def Exec(self, feedback_fn):
15061     """Run the allocator test.
15062
15063     """
15064     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15065       ial = IAllocator(self.cfg, self.rpc,
15066                        mode=self.op.mode,
15067                        name=self.op.name,
15068                        memory=self.op.memory,
15069                        disks=self.op.disks,
15070                        disk_template=self.op.disk_template,
15071                        os=self.op.os,
15072                        tags=self.op.tags,
15073                        nics=self.op.nics,
15074                        vcpus=self.op.vcpus,
15075                        hypervisor=self.op.hypervisor,
15076                        )
15077     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15078       ial = IAllocator(self.cfg, self.rpc,
15079                        mode=self.op.mode,
15080                        name=self.op.name,
15081                        relocate_from=list(self.relocate_from),
15082                        )
15083     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15084       ial = IAllocator(self.cfg, self.rpc,
15085                        mode=self.op.mode,
15086                        instances=self.op.instances,
15087                        target_groups=self.op.target_groups)
15088     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15089       ial = IAllocator(self.cfg, self.rpc,
15090                        mode=self.op.mode,
15091                        instances=self.op.instances,
15092                        evac_mode=self.op.evac_mode)
15093     else:
15094       raise errors.ProgrammerError("Uncatched mode %s in"
15095                                    " LUTestAllocator.Exec", self.op.mode)
15096
15097     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15098       result = ial.in_text
15099     else:
15100       ial.Run(self.op.allocator, validate=False)
15101       result = ial.out_text
15102     return result
15103
15104
15105 #: Query type implementations
15106 _QUERY_IMPL = {
15107   constants.QR_INSTANCE: _InstanceQuery,
15108   constants.QR_NODE: _NodeQuery,
15109   constants.QR_GROUP: _GroupQuery,
15110   constants.QR_OS: _OsQuery,
15111   }
15112
15113 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15114
15115
15116 def _GetQueryImplementation(name):
15117   """Returns the implemtnation for a query type.
15118
15119   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15120
15121   """
15122   try:
15123     return _QUERY_IMPL[name]
15124   except KeyError:
15125     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15126                                errors.ECODE_INVAL)