code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield="name")
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _MakeLegacyNodeInfo(data):
 586   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 587
 588   Converts the data into a single dictionary. This is fine for most use cases,
 589   but some require information from more than one volume group or hypervisor.
 590
 591   """
 592   (bootid, (vg_info, ), (hv_info, )) = data
 593
 594   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 595     "bootid": bootid,
 596     })
 597
 598
 599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 600   """Checks if the owned node groups are still correct for an instance.
 601
 602   @type cfg: L{config.ConfigWriter}
 603   @param cfg: The cluster configuration
 604   @type instance_name: string
 605   @param instance_name: Instance name
 606   @type owned_groups: set or frozenset
 607   @param owned_groups: List of currently owned node groups
 608
 609   """
 610   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 611
 612   if not owned_groups.issuperset(inst_groups):
 613     raise errors.OpPrereqError("Instance %s's node groups changed since"
 614                                " locks were acquired, current groups are"
 615                                " are '%s', owning groups '%s'; retry the"
 616                                " operation" %
 617                                (instance_name,
 618                                 utils.CommaJoin(inst_groups),
 619                                 utils.CommaJoin(owned_groups)),
 620                                errors.ECODE_STATE)
 621
 622   return inst_groups
 623
 624
 625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 626   """Checks if the instances in a node group are still correct.
 627
 628   @type cfg: L{config.ConfigWriter}
 629   @param cfg: The cluster configuration
 630   @type group_uuid: string
 631   @param group_uuid: Node group UUID
 632   @type owned_instances: set or frozenset
 633   @param owned_instances: List of currently owned instances
 634
 635   """
 636   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 637   if owned_instances != wanted_instances:
 638     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 639                                " locks were acquired, wanted '%s', have '%s';"
 640                                " retry the operation" %
 641                                (group_uuid,
 642                                 utils.CommaJoin(wanted_instances),
 643                                 utils.CommaJoin(owned_instances)),
 644                                errors.ECODE_STATE)
 645
 646   return wanted_instances
 647
 648
 649 def _SupportsOob(cfg, node):
 650   """Tells if node supports OOB.
 651
 652   @type cfg: L{config.ConfigWriter}
 653   @param cfg: The cluster configuration
 654   @type node: L{objects.Node}
 655   @param node: The node
 656   @return: The OOB script if supported or an empty string otherwise
 657
 658   """
 659   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 660
 661
 662 def _GetWantedNodes(lu, nodes):
 663   """Returns list of checked and expanded node names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type nodes: list
 668   @param nodes: list of node names or None for all nodes
 669   @rtype: list
 670   @return: the list of nodes, sorted
 671   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 672
 673   """
 674   if nodes:
 675     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 676
 677   return utils.NiceSort(lu.cfg.GetNodeList())
 678
 679
 680 def _GetWantedInstances(lu, instances):
 681   """Returns list of checked and expanded instance names.
 682
 683   @type lu: L{LogicalUnit}
 684   @param lu: the logical unit on whose behalf we execute
 685   @type instances: list
 686   @param instances: list of instance names or None for all instances
 687   @rtype: list
 688   @return: the list of instances, sorted
 689   @raise errors.OpPrereqError: if the instances parameter is wrong type
 690   @raise errors.OpPrereqError: if any of the passed instances is not found
 691
 692   """
 693   if instances:
 694     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 695   else:
 696     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 697   return wanted
 698
 699
 700 def _GetUpdatedParams(old_params, update_dict,
 701                       use_default=True, use_none=False):
 702   """Return the new version of a parameter dictionary.
 703
 704   @type old_params: dict
 705   @param old_params: old parameters
 706   @type update_dict: dict
 707   @param update_dict: dict containing new parameter values, or
 708       constants.VALUE_DEFAULT to reset the parameter to its default
 709       value
 710   @param use_default: boolean
 711   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 712       values as 'to be deleted' values
 713   @param use_none: boolean
 714   @type use_none: whether to recognise C{None} values as 'to be
 715       deleted' values
 716   @rtype: dict
 717   @return: the new parameter dictionary
 718
 719   """
 720   params_copy = copy.deepcopy(old_params)
 721   for key, val in update_dict.iteritems():
 722     if ((use_default and val == constants.VALUE_DEFAULT) or
 723         (use_none and val is None)):
 724       try:
 725         del params_copy[key]
 726       except KeyError:
 727         pass
 728     else:
 729       params_copy[key] = val
 730   return params_copy
 731
 732
 733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 734   """Return the new version of a instance policy.
 735
 736   @param group_policy: whether this policy applies to a group and thus
 737     we should support removal of policy entries
 738
 739   """
 740   use_none = use_default = group_policy
 741   ipolicy = copy.deepcopy(old_ipolicy)
 742   for key, value in new_ipolicy.items():
 743     if key not in constants.IPOLICY_ALL_KEYS:
 744       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 745                                  errors.ECODE_INVAL)
 746     if key in constants.IPOLICY_ISPECS:
 747       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 748       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 749                                        use_none=use_none,
 750                                        use_default=use_default)
 751     else:
 752       if not value or value == [constants.VALUE_DEFAULT]:
 753         if group_policy:
 754           del ipolicy[key]
 755         else:
 756           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 757                                      " on the cluster'" % key,
 758                                      errors.ECODE_INVAL)
 759       else:
 760         if key in constants.IPOLICY_PARAMETERS:
 761           # FIXME: we assume all such values are float
 762           try:
 763             ipolicy[key] = float(value)
 764           except (TypeError, ValueError), err:
 765             raise errors.OpPrereqError("Invalid value for attribute"
 766                                        " '%s': '%s', error: %s" %
 767                                        (key, value, err), errors.ECODE_INVAL)
 768         else:
 769           # FIXME: we assume all others are lists; this should be redone
 770           # in a nicer way
 771           ipolicy[key] = list(value)
 772   try:
 773     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 774   except errors.ConfigurationError, err:
 775     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 776                                errors.ECODE_INVAL)
 777   return ipolicy
 778
 779
 780 def _UpdateAndVerifySubDict(base, updates, type_check):
 781   """Updates and verifies a dict with sub dicts of the same type.
 782
 783   @param base: The dict with the old data
 784   @param updates: The dict with the new data
 785   @param type_check: Dict suitable to ForceDictType to verify correct types
 786   @returns: A new dict with updated and verified values
 787
 788   """
 789   def fn(old, value):
 790     new = _GetUpdatedParams(old, value)
 791     utils.ForceDictType(new, type_check)
 792     return new
 793
 794   ret = copy.deepcopy(base)
 795   ret.update(dict((key, fn(base.get(key, {}), value))
 796                   for key, value in updates.items()))
 797   return ret
 798
 799
 800 def _MergeAndVerifyHvState(op_input, obj_input):
 801   """Combines the hv state from an opcode with the one of the object
 802
 803   @param op_input: The input dict from the opcode
 804   @param obj_input: The input dict from the objects
 805   @return: The verified and updated dict
 806
 807   """
 808   if op_input:
 809     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 810     if invalid_hvs:
 811       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 812                                  " %s" % utils.CommaJoin(invalid_hvs),
 813                                  errors.ECODE_INVAL)
 814     if obj_input is None:
 815       obj_input = {}
 816     type_check = constants.HVSTS_PARAMETER_TYPES
 817     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 818
 819   return None
 820
 821
 822 def _MergeAndVerifyDiskState(op_input, obj_input):
 823   """Combines the disk state from an opcode with the one of the object
 824
 825   @param op_input: The input dict from the opcode
 826   @param obj_input: The input dict from the objects
 827   @return: The verified and updated dict
 828   """
 829   if op_input:
 830     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 831     if invalid_dst:
 832       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 833                                  utils.CommaJoin(invalid_dst),
 834                                  errors.ECODE_INVAL)
 835     type_check = constants.DSS_PARAMETER_TYPES
 836     if obj_input is None:
 837       obj_input = {}
 838     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 839                                               type_check))
 840                 for key, value in op_input.items())
 841
 842   return None
 843
 844
 845 def _ReleaseLocks(lu, level, names=None, keep=None):
 846   """Releases locks owned by an LU.
 847
 848   @type lu: L{LogicalUnit}
 849   @param level: Lock level
 850   @type names: list or None
 851   @param names: Names of locks to release
 852   @type keep: list or None
 853   @param keep: Names of locks to retain
 854
 855   """
 856   assert not (keep is not None and names is not None), \
 857          "Only one of the 'names' and the 'keep' parameters can be given"
 858
 859   if names is not None:
 860     should_release = names.__contains__
 861   elif keep:
 862     should_release = lambda name: name not in keep
 863   else:
 864     should_release = None
 865
 866   owned = lu.owned_locks(level)
 867   if not owned:
 868     # Not owning any lock at this level, do nothing
 869     pass
 870
 871   elif should_release:
 872     retain = []
 873     release = []
 874
 875     # Determine which locks to release
 876     for name in owned:
 877       if should_release(name):
 878         release.append(name)
 879       else:
 880         retain.append(name)
 881
 882     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 883
 884     # Release just some locks
 885     lu.glm.release(level, names=release)
 886
 887     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 888   else:
 889     # Release everything
 890     lu.glm.release(level)
 891
 892     assert not lu.glm.is_owned(level), "No locks should be owned"
 893
 894
 895 def _MapInstanceDisksToNodes(instances):
 896   """Creates a map from (node, volume) to instance name.
 897
 898   @type instances: list of L{objects.Instance}
 899   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 900
 901   """
 902   return dict(((node, vol), inst.name)
 903               for inst in instances
 904               for (node, vols) in inst.MapLVsByNode().items()
 905               for vol in vols)
 906
 907
 908 def _RunPostHook(lu, node_name):
 909   """Runs the post-hook for an opcode on a single node.
 910
 911   """
 912   hm = lu.proc.BuildHooksManager(lu)
 913   try:
 914     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 915   except:
 916     # pylint: disable=W0702
 917     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 918
 919
 920 def _CheckOutputFields(static, dynamic, selected):
 921   """Checks whether all selected fields are valid.
 922
 923   @type static: L{utils.FieldSet}
 924   @param static: static fields set
 925   @type dynamic: L{utils.FieldSet}
 926   @param dynamic: dynamic fields set
 927
 928   """
 929   f = utils.FieldSet()
 930   f.Extend(static)
 931   f.Extend(dynamic)
 932
 933   delta = f.NonMatching(selected)
 934   if delta:
 935     raise errors.OpPrereqError("Unknown output fields selected: %s"
 936                                % ",".join(delta), errors.ECODE_INVAL)
 937
 938
 939 def _CheckGlobalHvParams(params):
 940   """Validates that given hypervisor params are not global ones.
 941
 942   This will ensure that instances don't get customised versions of
 943   global params.
 944
 945   """
 946   used_globals = constants.HVC_GLOBALS.intersection(params)
 947   if used_globals:
 948     msg = ("The following hypervisor parameters are global and cannot"
 949            " be customized at instance level, please modify them at"
 950            " cluster level: %s" % utils.CommaJoin(used_globals))
 951     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 952
 953
 954 def _CheckNodeOnline(lu, node, msg=None):
 955   """Ensure that a given node is online.
 956
 957   @param lu: the LU on behalf of which we make the check
 958   @param node: the node to check
 959   @param msg: if passed, should be a message to replace the default one
 960   @raise errors.OpPrereqError: if the node is offline
 961
 962   """
 963   if msg is None:
 964     msg = "Can't use offline node"
 965   if lu.cfg.GetNodeInfo(node).offline:
 966     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 967
 968
 969 def _CheckNodeNotDrained(lu, node):
 970   """Ensure that a given node is not drained.
 971
 972   @param lu: the LU on behalf of which we make the check
 973   @param node: the node to check
 974   @raise errors.OpPrereqError: if the node is drained
 975
 976   """
 977   if lu.cfg.GetNodeInfo(node).drained:
 978     raise errors.OpPrereqError("Can't use drained node %s" % node,
 979                                errors.ECODE_STATE)
 980
 981
 982 def _CheckNodeVmCapable(lu, node):
 983   """Ensure that a given node is vm capable.
 984
 985   @param lu: the LU on behalf of which we make the check
 986   @param node: the node to check
 987   @raise errors.OpPrereqError: if the node is not vm capable
 988
 989   """
 990   if not lu.cfg.GetNodeInfo(node).vm_capable:
 991     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 992                                errors.ECODE_STATE)
 993
 994
 995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 996   """Ensure that a node supports a given OS.
 997
 998   @param lu: the LU on behalf of which we make the check
 999   @param node: the node to check
1000   @param os_name: the OS to query about
1001   @param force_variant: whether to ignore variant errors
1002   @raise errors.OpPrereqError: if the node is not supporting the OS
1003
1004   """
1005   result = lu.rpc.call_os_get(node, os_name)
1006   result.Raise("OS '%s' not in supported OS list for node %s" %
1007                (os_name, node),
1008                prereq=True, ecode=errors.ECODE_INVAL)
1009   if not force_variant:
1010     _CheckOSVariant(result.payload, os_name)
1011
1012
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014   """Ensure that a node has the given secondary ip.
1015
1016   @type lu: L{LogicalUnit}
1017   @param lu: the LU on behalf of which we make the check
1018   @type node: string
1019   @param node: the node to check
1020   @type secondary_ip: string
1021   @param secondary_ip: the ip to check
1022   @type prereq: boolean
1023   @param prereq: whether to throw a prerequisite or an execute error
1024   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1026
1027   """
1028   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029   result.Raise("Failure checking secondary ip on node %s" % node,
1030                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031   if not result.payload:
1032     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033            " please fix and re-run this command" % secondary_ip)
1034     if prereq:
1035       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1036     else:
1037       raise errors.OpExecError(msg)
1038
1039
1040 def _GetClusterDomainSecret():
1041   """Reads the cluster domain secret.
1042
1043   """
1044   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1045                                strict=True)
1046
1047
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049   """Ensure that an instance is in one of the required states.
1050
1051   @param lu: the LU on behalf of which we make the check
1052   @param instance: the instance to check
1053   @param msg: if passed, should be a message to replace the default one
1054   @raise errors.OpPrereqError: if the instance is not in the required state
1055
1056   """
1057   if msg is None:
1058     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059   if instance.admin_state not in req_states:
1060     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061                                (instance.name, instance.admin_state, msg),
1062                                errors.ECODE_STATE)
1063
1064   if constants.ADMINST_UP not in req_states:
1065     pnode = instance.primary_node
1066     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068                 prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070     if instance.name in ins_l.payload:
1071       raise errors.OpPrereqError("Instance %s is running, %s" %
1072                                  (instance.name, msg), errors.ECODE_STATE)
1073
1074
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076   """Computes if value is in the desired range.
1077
1078   @param name: name of the parameter for which we perform the check
1079   @param ipolicy: dictionary containing min, max and std values
1080   @param value: actual value that we want to use
1081   @return: None or element not meeting the criteria
1082
1083
1084   """
1085   if value in [None, constants.VALUE_AUTO]:
1086     return None
1087   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089   if value > max_v or min_v > value:
1090     return ("%s value %s is not in range [%s, %s]" %
1091             (name, value, min_v, max_v))
1092   return None
1093
1094
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096                                  nic_count, disk_sizes, spindle_use,
1097                                  _compute_fn=_ComputeMinMaxSpec):
1098   """Verifies ipolicy against provided specs.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy
1102   @type mem_size: int
1103   @param mem_size: The memory size
1104   @type cpu_count: int
1105   @param cpu_count: Used cpu cores
1106   @type disk_count: int
1107   @param disk_count: Number of disks used
1108   @type nic_count: int
1109   @param nic_count: Number of nics used
1110   @type disk_sizes: list of ints
1111   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112   @type spindle_use: int
1113   @param spindle_use: The number of spindles this instance uses
1114   @param _compute_fn: The compute function (unittest only)
1115   @return: A list of violations, or an empty list of no violations are found
1116
1117   """
1118   assert disk_count == len(disk_sizes)
1119
1120   test_settings = [
1121     (constants.ISPEC_MEM_SIZE, mem_size),
1122     (constants.ISPEC_CPU_COUNT, cpu_count),
1123     (constants.ISPEC_DISK_COUNT, disk_count),
1124     (constants.ISPEC_NIC_COUNT, nic_count),
1125     (constants.ISPEC_SPINDLE_USE, spindle_use),
1126     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1127
1128   return filter(None,
1129                 (_compute_fn(name, ipolicy, value)
1130                  for (name, value) in test_settings))
1131
1132
1133 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1134                                      _compute_fn=_ComputeIPolicySpecViolation):
1135   """Compute if instance meets the specs of ipolicy.
1136
1137   @type ipolicy: dict
1138   @param ipolicy: The ipolicy to verify against
1139   @type instance: L{objects.Instance}
1140   @param instance: The instance to verify
1141   @param _compute_fn: The function to verify ipolicy (unittest only)
1142   @see: L{_ComputeIPolicySpecViolation}
1143
1144   """
1145   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1146   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1147   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USAGE, None)
1148   disk_count = len(instance.disks)
1149   disk_sizes = [disk.size for disk in instance.disks]
1150   nic_count = len(instance.nics)
1151
1152   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153                      disk_sizes, spindle_use)
1154
1155
1156 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1157     _compute_fn=_ComputeIPolicySpecViolation):
1158   """Compute if instance specs meets the specs of ipolicy.
1159
1160   @type ipolicy: dict
1161   @param ipolicy: The ipolicy to verify against
1162   @param instance_spec: dict
1163   @param instance_spec: The instance spec to verify
1164   @param _compute_fn: The function to verify ipolicy (unittest only)
1165   @see: L{_ComputeIPolicySpecViolation}
1166
1167   """
1168   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1169   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1170   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1171   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1172   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1173   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1174
1175   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1176                      disk_sizes, spindle_use)
1177
1178
1179 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1180                                  target_group,
1181                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1182   """Compute if instance meets the specs of the new target group.
1183
1184   @param ipolicy: The ipolicy to verify
1185   @param instance: The instance object to verify
1186   @param current_group: The current group of the instance
1187   @param target_group: The new group of the instance
1188   @param _compute_fn: The function to verify ipolicy (unittest only)
1189   @see: L{_ComputeIPolicySpecViolation}
1190
1191   """
1192   if current_group == target_group:
1193     return []
1194   else:
1195     return _compute_fn(ipolicy, instance)
1196
1197
1198 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1199                             _compute_fn=_ComputeIPolicyNodeViolation):
1200   """Checks that the target node is correct in terms of instance policy.
1201
1202   @param ipolicy: The ipolicy to verify
1203   @param instance: The instance object to verify
1204   @param node: The new node to relocate
1205   @param ignore: Ignore violations of the ipolicy
1206   @param _compute_fn: The function to verify ipolicy (unittest only)
1207   @see: L{_ComputeIPolicySpecViolation}
1208
1209   """
1210   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1211   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1212
1213   if res:
1214     msg = ("Instance does not meet target node group's (%s) instance"
1215            " policy: %s") % (node.group, utils.CommaJoin(res))
1216     if ignore:
1217       lu.LogWarning(msg)
1218     else:
1219       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1220
1221
1222 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1223   """Computes a set of any instances that would violate the new ipolicy.
1224
1225   @param old_ipolicy: The current (still in-place) ipolicy
1226   @param new_ipolicy: The new (to become) ipolicy
1227   @param instances: List of instances to verify
1228   @return: A list of instances which violates the new ipolicy but did not before
1229
1230   """
1231   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1232           _ComputeViolatingInstances(new_ipolicy, instances))
1233
1234
1235 def _ExpandItemName(fn, name, kind):
1236   """Expand an item name.
1237
1238   @param fn: the function to use for expansion
1239   @param name: requested item name
1240   @param kind: text description ('Node' or 'Instance')
1241   @return: the resolved (full) name
1242   @raise errors.OpPrereqError: if the item is not found
1243
1244   """
1245   full_name = fn(name)
1246   if full_name is None:
1247     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1248                                errors.ECODE_NOENT)
1249   return full_name
1250
1251
1252 def _ExpandNodeName(cfg, name):
1253   """Wrapper over L{_ExpandItemName} for nodes."""
1254   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1255
1256
1257 def _ExpandInstanceName(cfg, name):
1258   """Wrapper over L{_ExpandItemName} for instance."""
1259   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1260
1261
1262 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1263                           minmem, maxmem, vcpus, nics, disk_template, disks,
1264                           bep, hvp, hypervisor_name, tags):
1265   """Builds instance related env variables for hooks
1266
1267   This builds the hook environment from individual variables.
1268
1269   @type name: string
1270   @param name: the name of the instance
1271   @type primary_node: string
1272   @param primary_node: the name of the instance's primary node
1273   @type secondary_nodes: list
1274   @param secondary_nodes: list of secondary nodes as strings
1275   @type os_type: string
1276   @param os_type: the name of the instance's OS
1277   @type status: string
1278   @param status: the desired status of the instance
1279   @type minmem: string
1280   @param minmem: the minimum memory size of the instance
1281   @type maxmem: string
1282   @param maxmem: the maximum memory size of the instance
1283   @type vcpus: string
1284   @param vcpus: the count of VCPUs the instance has
1285   @type nics: list
1286   @param nics: list of tuples (ip, mac, mode, link) representing
1287       the NICs the instance has
1288   @type disk_template: string
1289   @param disk_template: the disk template of the instance
1290   @type disks: list
1291   @param disks: the list of (size, mode) pairs
1292   @type bep: dict
1293   @param bep: the backend parameters for the instance
1294   @type hvp: dict
1295   @param hvp: the hypervisor parameters for the instance
1296   @type hypervisor_name: string
1297   @param hypervisor_name: the hypervisor for the instance
1298   @type tags: list
1299   @param tags: list of instance tags as strings
1300   @rtype: dict
1301   @return: the hook environment for this instance
1302
1303   """
1304   env = {
1305     "OP_TARGET": name,
1306     "INSTANCE_NAME": name,
1307     "INSTANCE_PRIMARY": primary_node,
1308     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1309     "INSTANCE_OS_TYPE": os_type,
1310     "INSTANCE_STATUS": status,
1311     "INSTANCE_MINMEM": minmem,
1312     "INSTANCE_MAXMEM": maxmem,
1313     # TODO(2.7) remove deprecated "memory" value
1314     "INSTANCE_MEMORY": maxmem,
1315     "INSTANCE_VCPUS": vcpus,
1316     "INSTANCE_DISK_TEMPLATE": disk_template,
1317     "INSTANCE_HYPERVISOR": hypervisor_name,
1318   }
1319   if nics:
1320     nic_count = len(nics)
1321     for idx, (ip, mac, mode, link) in enumerate(nics):
1322       if ip is None:
1323         ip = ""
1324       env["INSTANCE_NIC%d_IP" % idx] = ip
1325       env["INSTANCE_NIC%d_MAC" % idx] = mac
1326       env["INSTANCE_NIC%d_MODE" % idx] = mode
1327       env["INSTANCE_NIC%d_LINK" % idx] = link
1328       if mode == constants.NIC_MODE_BRIDGED:
1329         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1330   else:
1331     nic_count = 0
1332
1333   env["INSTANCE_NIC_COUNT"] = nic_count
1334
1335   if disks:
1336     disk_count = len(disks)
1337     for idx, (size, mode) in enumerate(disks):
1338       env["INSTANCE_DISK%d_SIZE" % idx] = size
1339       env["INSTANCE_DISK%d_MODE" % idx] = mode
1340   else:
1341     disk_count = 0
1342
1343   env["INSTANCE_DISK_COUNT"] = disk_count
1344
1345   if not tags:
1346     tags = []
1347
1348   env["INSTANCE_TAGS"] = " ".join(tags)
1349
1350   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1351     for key, value in source.items():
1352       env["INSTANCE_%s_%s" % (kind, key)] = value
1353
1354   return env
1355
1356
1357 def _NICListToTuple(lu, nics):
1358   """Build a list of nic information tuples.
1359
1360   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1361   value in LUInstanceQueryData.
1362
1363   @type lu:  L{LogicalUnit}
1364   @param lu: the logical unit on whose behalf we execute
1365   @type nics: list of L{objects.NIC}
1366   @param nics: list of nics to convert to hooks tuples
1367
1368   """
1369   hooks_nics = []
1370   cluster = lu.cfg.GetClusterInfo()
1371   for nic in nics:
1372     ip = nic.ip
1373     mac = nic.mac
1374     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1375     mode = filled_params[constants.NIC_MODE]
1376     link = filled_params[constants.NIC_LINK]
1377     hooks_nics.append((ip, mac, mode, link))
1378   return hooks_nics
1379
1380
1381 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1382   """Builds instance related env variables for hooks from an object.
1383
1384   @type lu: L{LogicalUnit}
1385   @param lu: the logical unit on whose behalf we execute
1386   @type instance: L{objects.Instance}
1387   @param instance: the instance for which we should build the
1388       environment
1389   @type override: dict
1390   @param override: dictionary with key/values that will override
1391       our values
1392   @rtype: dict
1393   @return: the hook environment dictionary
1394
1395   """
1396   cluster = lu.cfg.GetClusterInfo()
1397   bep = cluster.FillBE(instance)
1398   hvp = cluster.FillHV(instance)
1399   args = {
1400     "name": instance.name,
1401     "primary_node": instance.primary_node,
1402     "secondary_nodes": instance.secondary_nodes,
1403     "os_type": instance.os,
1404     "status": instance.admin_state,
1405     "maxmem": bep[constants.BE_MAXMEM],
1406     "minmem": bep[constants.BE_MINMEM],
1407     "vcpus": bep[constants.BE_VCPUS],
1408     "nics": _NICListToTuple(lu, instance.nics),
1409     "disk_template": instance.disk_template,
1410     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1411     "bep": bep,
1412     "hvp": hvp,
1413     "hypervisor_name": instance.hypervisor,
1414     "tags": instance.tags,
1415   }
1416   if override:
1417     args.update(override)
1418   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1419
1420
1421 def _AdjustCandidatePool(lu, exceptions):
1422   """Adjust the candidate pool after node operations.
1423
1424   """
1425   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1426   if mod_list:
1427     lu.LogInfo("Promoted nodes to master candidate role: %s",
1428                utils.CommaJoin(node.name for node in mod_list))
1429     for name in mod_list:
1430       lu.context.ReaddNode(name)
1431   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432   if mc_now > mc_max:
1433     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1434                (mc_now, mc_max))
1435
1436
1437 def _DecideSelfPromotion(lu, exceptions=None):
1438   """Decide whether I should promote myself as a master candidate.
1439
1440   """
1441   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1442   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1443   # the new node will increase mc_max with one, so:
1444   mc_should = min(mc_should + 1, cp_size)
1445   return mc_now < mc_should
1446
1447
1448 def _CalculateGroupIPolicy(cluster, group):
1449   """Calculate instance policy for group.
1450
1451   """
1452   return cluster.SimpleFillIPolicy(group.ipolicy)
1453
1454
1455 def _ComputeViolatingInstances(ipolicy, instances):
1456   """Computes a set of instances who violates given ipolicy.
1457
1458   @param ipolicy: The ipolicy to verify
1459   @type instances: object.Instance
1460   @param instances: List of instances to verify
1461   @return: A frozenset of instance names violating the ipolicy
1462
1463   """
1464   return frozenset([inst.name for inst in instances
1465                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1466
1467
1468 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1469   """Check that the brigdes needed by a list of nics exist.
1470
1471   """
1472   cluster = lu.cfg.GetClusterInfo()
1473   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1474   brlist = [params[constants.NIC_LINK] for params in paramslist
1475             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1476   if brlist:
1477     result = lu.rpc.call_bridges_exist(target_node, brlist)
1478     result.Raise("Error checking bridges on destination node '%s'" %
1479                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1480
1481
1482 def _CheckInstanceBridgesExist(lu, instance, node=None):
1483   """Check that the brigdes needed by an instance exist.
1484
1485   """
1486   if node is None:
1487     node = instance.primary_node
1488   _CheckNicsBridgesExist(lu, instance.nics, node)
1489
1490
1491 def _CheckOSVariant(os_obj, name):
1492   """Check whether an OS name conforms to the os variants specification.
1493
1494   @type os_obj: L{objects.OS}
1495   @param os_obj: OS object to check
1496   @type name: string
1497   @param name: OS name passed by the user, to check for validity
1498
1499   """
1500   variant = objects.OS.GetVariant(name)
1501   if not os_obj.supported_variants:
1502     if variant:
1503       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1504                                  " passed)" % (os_obj.name, variant),
1505                                  errors.ECODE_INVAL)
1506     return
1507   if not variant:
1508     raise errors.OpPrereqError("OS name must include a variant",
1509                                errors.ECODE_INVAL)
1510
1511   if variant not in os_obj.supported_variants:
1512     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1513
1514
1515 def _GetNodeInstancesInner(cfg, fn):
1516   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1517
1518
1519 def _GetNodeInstances(cfg, node_name):
1520   """Returns a list of all primary and secondary instances on a node.
1521
1522   """
1523
1524   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1525
1526
1527 def _GetNodePrimaryInstances(cfg, node_name):
1528   """Returns primary instances on a node.
1529
1530   """
1531   return _GetNodeInstancesInner(cfg,
1532                                 lambda inst: node_name == inst.primary_node)
1533
1534
1535 def _GetNodeSecondaryInstances(cfg, node_name):
1536   """Returns secondary instances on a node.
1537
1538   """
1539   return _GetNodeInstancesInner(cfg,
1540                                 lambda inst: node_name in inst.secondary_nodes)
1541
1542
1543 def _GetStorageTypeArgs(cfg, storage_type):
1544   """Returns the arguments for a storage type.
1545
1546   """
1547   # Special case for file storage
1548   if storage_type == constants.ST_FILE:
1549     # storage.FileStorage wants a list of storage directories
1550     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1551
1552   return []
1553
1554
1555 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1556   faulty = []
1557
1558   for dev in instance.disks:
1559     cfg.SetDiskID(dev, node_name)
1560
1561   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1562   result.Raise("Failed to get disk status from node %s" % node_name,
1563                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1564
1565   for idx, bdev_status in enumerate(result.payload):
1566     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1567       faulty.append(idx)
1568
1569   return faulty
1570
1571
1572 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1573   """Check the sanity of iallocator and node arguments and use the
1574   cluster-wide iallocator if appropriate.
1575
1576   Check that at most one of (iallocator, node) is specified. If none is
1577   specified, then the LU's opcode's iallocator slot is filled with the
1578   cluster-wide default iallocator.
1579
1580   @type iallocator_slot: string
1581   @param iallocator_slot: the name of the opcode iallocator slot
1582   @type node_slot: string
1583   @param node_slot: the name of the opcode target node slot
1584
1585   """
1586   node = getattr(lu.op, node_slot, None)
1587   iallocator = getattr(lu.op, iallocator_slot, None)
1588
1589   if node is not None and iallocator is not None:
1590     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1591                                errors.ECODE_INVAL)
1592   elif node is None and iallocator is None:
1593     default_iallocator = lu.cfg.GetDefaultIAllocator()
1594     if default_iallocator:
1595       setattr(lu.op, iallocator_slot, default_iallocator)
1596     else:
1597       raise errors.OpPrereqError("No iallocator or node given and no"
1598                                  " cluster-wide default iallocator found;"
1599                                  " please specify either an iallocator or a"
1600                                  " node, or set a cluster-wide default"
1601                                  " iallocator")
1602
1603
1604 def _GetDefaultIAllocator(cfg, iallocator):
1605   """Decides on which iallocator to use.
1606
1607   @type cfg: L{config.ConfigWriter}
1608   @param cfg: Cluster configuration object
1609   @type iallocator: string or None
1610   @param iallocator: Iallocator specified in opcode
1611   @rtype: string
1612   @return: Iallocator name
1613
1614   """
1615   if not iallocator:
1616     # Use default iallocator
1617     iallocator = cfg.GetDefaultIAllocator()
1618
1619   if not iallocator:
1620     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1621                                " opcode nor as a cluster-wide default",
1622                                errors.ECODE_INVAL)
1623
1624   return iallocator
1625
1626
1627 class LUClusterPostInit(LogicalUnit):
1628   """Logical unit for running hooks after cluster initialization.
1629
1630   """
1631   HPATH = "cluster-init"
1632   HTYPE = constants.HTYPE_CLUSTER
1633
1634   def BuildHooksEnv(self):
1635     """Build hooks env.
1636
1637     """
1638     return {
1639       "OP_TARGET": self.cfg.GetClusterName(),
1640       }
1641
1642   def BuildHooksNodes(self):
1643     """Build hooks nodes.
1644
1645     """
1646     return ([], [self.cfg.GetMasterNode()])
1647
1648   def Exec(self, feedback_fn):
1649     """Nothing to do.
1650
1651     """
1652     return True
1653
1654
1655 class LUClusterDestroy(LogicalUnit):
1656   """Logical unit for destroying the cluster.
1657
1658   """
1659   HPATH = "cluster-destroy"
1660   HTYPE = constants.HTYPE_CLUSTER
1661
1662   def BuildHooksEnv(self):
1663     """Build hooks env.
1664
1665     """
1666     return {
1667       "OP_TARGET": self.cfg.GetClusterName(),
1668       }
1669
1670   def BuildHooksNodes(self):
1671     """Build hooks nodes.
1672
1673     """
1674     return ([], [])
1675
1676   def CheckPrereq(self):
1677     """Check prerequisites.
1678
1679     This checks whether the cluster is empty.
1680
1681     Any errors are signaled by raising errors.OpPrereqError.
1682
1683     """
1684     master = self.cfg.GetMasterNode()
1685
1686     nodelist = self.cfg.GetNodeList()
1687     if len(nodelist) != 1 or nodelist[0] != master:
1688       raise errors.OpPrereqError("There are still %d node(s) in"
1689                                  " this cluster." % (len(nodelist) - 1),
1690                                  errors.ECODE_INVAL)
1691     instancelist = self.cfg.GetInstanceList()
1692     if instancelist:
1693       raise errors.OpPrereqError("There are still %d instance(s) in"
1694                                  " this cluster." % len(instancelist),
1695                                  errors.ECODE_INVAL)
1696
1697   def Exec(self, feedback_fn):
1698     """Destroys the cluster.
1699
1700     """
1701     master_params = self.cfg.GetMasterNetworkParameters()
1702
1703     # Run post hooks on master node before it's removed
1704     _RunPostHook(self, master_params.name)
1705
1706     ems = self.cfg.GetUseExternalMipScript()
1707     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1708                                                      master_params, ems)
1709     if result.fail_msg:
1710       self.LogWarning("Error disabling the master IP address: %s",
1711                       result.fail_msg)
1712
1713     return master_params.name
1714
1715
1716 def _VerifyCertificate(filename):
1717   """Verifies a certificate for L{LUClusterVerifyConfig}.
1718
1719   @type filename: string
1720   @param filename: Path to PEM file
1721
1722   """
1723   try:
1724     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1725                                            utils.ReadFile(filename))
1726   except Exception, err: # pylint: disable=W0703
1727     return (LUClusterVerifyConfig.ETYPE_ERROR,
1728             "Failed to load X509 certificate %s: %s" % (filename, err))
1729
1730   (errcode, msg) = \
1731     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1732                                 constants.SSL_CERT_EXPIRATION_ERROR)
1733
1734   if msg:
1735     fnamemsg = "While verifying %s: %s" % (filename, msg)
1736   else:
1737     fnamemsg = None
1738
1739   if errcode is None:
1740     return (None, fnamemsg)
1741   elif errcode == utils.CERT_WARNING:
1742     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1743   elif errcode == utils.CERT_ERROR:
1744     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1745
1746   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1747
1748
1749 def _GetAllHypervisorParameters(cluster, instances):
1750   """Compute the set of all hypervisor parameters.
1751
1752   @type cluster: L{objects.Cluster}
1753   @param cluster: the cluster object
1754   @param instances: list of L{objects.Instance}
1755   @param instances: additional instances from which to obtain parameters
1756   @rtype: list of (origin, hypervisor, parameters)
1757   @return: a list with all parameters found, indicating the hypervisor they
1758        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1759
1760   """
1761   hvp_data = []
1762
1763   for hv_name in cluster.enabled_hypervisors:
1764     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1765
1766   for os_name, os_hvp in cluster.os_hvp.items():
1767     for hv_name, hv_params in os_hvp.items():
1768       if hv_params:
1769         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1770         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1771
1772   # TODO: collapse identical parameter values in a single one
1773   for instance in instances:
1774     if instance.hvparams:
1775       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1776                        cluster.FillHV(instance)))
1777
1778   return hvp_data
1779
1780
1781 class _VerifyErrors(object):
1782   """Mix-in for cluster/group verify LUs.
1783
1784   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1785   self.op and self._feedback_fn to be available.)
1786
1787   """
1788
1789   ETYPE_FIELD = "code"
1790   ETYPE_ERROR = "ERROR"
1791   ETYPE_WARNING = "WARNING"
1792
1793   def _Error(self, ecode, item, msg, *args, **kwargs):
1794     """Format an error message.
1795
1796     Based on the opcode's error_codes parameter, either format a
1797     parseable error code, or a simpler error string.
1798
1799     This must be called only from Exec and functions called from Exec.
1800
1801     """
1802     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1803     itype, etxt, _ = ecode
1804     # first complete the msg
1805     if args:
1806       msg = msg % args
1807     # then format the whole message
1808     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1809       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1810     else:
1811       if item:
1812         item = " " + item
1813       else:
1814         item = ""
1815       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1816     # and finally report it via the feedback_fn
1817     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1818
1819   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1820     """Log an error message if the passed condition is True.
1821
1822     """
1823     cond = (bool(cond)
1824             or self.op.debug_simulate_errors) # pylint: disable=E1101
1825
1826     # If the error code is in the list of ignored errors, demote the error to a
1827     # warning
1828     (_, etxt, _) = ecode
1829     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1830       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1831
1832     if cond:
1833       self._Error(ecode, *args, **kwargs)
1834
1835     # do not mark the operation as failed for WARN cases only
1836     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1837       self.bad = self.bad or cond
1838
1839
1840 class LUClusterVerify(NoHooksLU):
1841   """Submits all jobs necessary to verify the cluster.
1842
1843   """
1844   REQ_BGL = False
1845
1846   def ExpandNames(self):
1847     self.needed_locks = {}
1848
1849   def Exec(self, feedback_fn):
1850     jobs = []
1851
1852     if self.op.group_name:
1853       groups = [self.op.group_name]
1854       depends_fn = lambda: None
1855     else:
1856       groups = self.cfg.GetNodeGroupList()
1857
1858       # Verify global configuration
1859       jobs.append([
1860         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1861         ])
1862
1863       # Always depend on global verification
1864       depends_fn = lambda: [(-len(jobs), [])]
1865
1866     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1867                                             ignore_errors=self.op.ignore_errors,
1868                                             depends=depends_fn())]
1869                 for group in groups)
1870
1871     # Fix up all parameters
1872     for op in itertools.chain(*jobs): # pylint: disable=W0142
1873       op.debug_simulate_errors = self.op.debug_simulate_errors
1874       op.verbose = self.op.verbose
1875       op.error_codes = self.op.error_codes
1876       try:
1877         op.skip_checks = self.op.skip_checks
1878       except AttributeError:
1879         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1880
1881     return ResultWithJobs(jobs)
1882
1883
1884 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1885   """Verifies the cluster config.
1886
1887   """
1888   REQ_BGL = True
1889
1890   def _VerifyHVP(self, hvp_data):
1891     """Verifies locally the syntax of the hypervisor parameters.
1892
1893     """
1894     for item, hv_name, hv_params in hvp_data:
1895       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1896              (item, hv_name))
1897       try:
1898         hv_class = hypervisor.GetHypervisor(hv_name)
1899         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1900         hv_class.CheckParameterSyntax(hv_params)
1901       except errors.GenericError, err:
1902         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1903
1904   def ExpandNames(self):
1905     # Information can be safely retrieved as the BGL is acquired in exclusive
1906     # mode
1907     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1908     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1909     self.all_node_info = self.cfg.GetAllNodesInfo()
1910     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1911     self.needed_locks = {}
1912
1913   def Exec(self, feedback_fn):
1914     """Verify integrity of cluster, performing various test on nodes.
1915
1916     """
1917     self.bad = False
1918     self._feedback_fn = feedback_fn
1919
1920     feedback_fn("* Verifying cluster config")
1921
1922     for msg in self.cfg.VerifyConfig():
1923       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1924
1925     feedback_fn("* Verifying cluster certificate files")
1926
1927     for cert_filename in constants.ALL_CERT_FILES:
1928       (errcode, msg) = _VerifyCertificate(cert_filename)
1929       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1930
1931     feedback_fn("* Verifying hypervisor parameters")
1932
1933     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1934                                                 self.all_inst_info.values()))
1935
1936     feedback_fn("* Verifying all nodes belong to an existing group")
1937
1938     # We do this verification here because, should this bogus circumstance
1939     # occur, it would never be caught by VerifyGroup, which only acts on
1940     # nodes/instances reachable from existing node groups.
1941
1942     dangling_nodes = set(node.name for node in self.all_node_info.values()
1943                          if node.group not in self.all_group_info)
1944
1945     dangling_instances = {}
1946     no_node_instances = []
1947
1948     for inst in self.all_inst_info.values():
1949       if inst.primary_node in dangling_nodes:
1950         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1951       elif inst.primary_node not in self.all_node_info:
1952         no_node_instances.append(inst.name)
1953
1954     pretty_dangling = [
1955         "%s (%s)" %
1956         (node.name,
1957          utils.CommaJoin(dangling_instances.get(node.name,
1958                                                 ["no instances"])))
1959         for node in dangling_nodes]
1960
1961     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1962                   None,
1963                   "the following nodes (and their instances) belong to a non"
1964                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1965
1966     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1967                   None,
1968                   "the following instances have a non-existing primary-node:"
1969                   " %s", utils.CommaJoin(no_node_instances))
1970
1971     return not self.bad
1972
1973
1974 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1975   """Verifies the status of a node group.
1976
1977   """
1978   HPATH = "cluster-verify"
1979   HTYPE = constants.HTYPE_CLUSTER
1980   REQ_BGL = False
1981
1982   _HOOKS_INDENT_RE = re.compile("^", re.M)
1983
1984   class NodeImage(object):
1985     """A class representing the logical and physical status of a node.
1986
1987     @type name: string
1988     @ivar name: the node name to which this object refers
1989     @ivar volumes: a structure as returned from
1990         L{ganeti.backend.GetVolumeList} (runtime)
1991     @ivar instances: a list of running instances (runtime)
1992     @ivar pinst: list of configured primary instances (config)
1993     @ivar sinst: list of configured secondary instances (config)
1994     @ivar sbp: dictionary of {primary-node: list of instances} for all
1995         instances for which this node is secondary (config)
1996     @ivar mfree: free memory, as reported by hypervisor (runtime)
1997     @ivar dfree: free disk, as reported by the node (runtime)
1998     @ivar offline: the offline status (config)
1999     @type rpc_fail: boolean
2000     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2001         not whether the individual keys were correct) (runtime)
2002     @type lvm_fail: boolean
2003     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2004     @type hyp_fail: boolean
2005     @ivar hyp_fail: whether the RPC call didn't return the instance list
2006     @type ghost: boolean
2007     @ivar ghost: whether this is a known node or not (config)
2008     @type os_fail: boolean
2009     @ivar os_fail: whether the RPC call didn't return valid OS data
2010     @type oslist: list
2011     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2012     @type vm_capable: boolean
2013     @ivar vm_capable: whether the node can host instances
2014
2015     """
2016     def __init__(self, offline=False, name=None, vm_capable=True):
2017       self.name = name
2018       self.volumes = {}
2019       self.instances = []
2020       self.pinst = []
2021       self.sinst = []
2022       self.sbp = {}
2023       self.mfree = 0
2024       self.dfree = 0
2025       self.offline = offline
2026       self.vm_capable = vm_capable
2027       self.rpc_fail = False
2028       self.lvm_fail = False
2029       self.hyp_fail = False
2030       self.ghost = False
2031       self.os_fail = False
2032       self.oslist = {}
2033
2034   def ExpandNames(self):
2035     # This raises errors.OpPrereqError on its own:
2036     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2037
2038     # Get instances in node group; this is unsafe and needs verification later
2039     inst_names = \
2040       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2041
2042     self.needed_locks = {
2043       locking.LEVEL_INSTANCE: inst_names,
2044       locking.LEVEL_NODEGROUP: [self.group_uuid],
2045       locking.LEVEL_NODE: [],
2046       }
2047
2048     self.share_locks = _ShareAll()
2049
2050   def DeclareLocks(self, level):
2051     if level == locking.LEVEL_NODE:
2052       # Get members of node group; this is unsafe and needs verification later
2053       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2054
2055       all_inst_info = self.cfg.GetAllInstancesInfo()
2056
2057       # In Exec(), we warn about mirrored instances that have primary and
2058       # secondary living in separate node groups. To fully verify that
2059       # volumes for these instances are healthy, we will need to do an
2060       # extra call to their secondaries. We ensure here those nodes will
2061       # be locked.
2062       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2063         # Important: access only the instances whose lock is owned
2064         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2065           nodes.update(all_inst_info[inst].secondary_nodes)
2066
2067       self.needed_locks[locking.LEVEL_NODE] = nodes
2068
2069   def CheckPrereq(self):
2070     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2071     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2072
2073     group_nodes = set(self.group_info.members)
2074     group_instances = \
2075       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2076
2077     unlocked_nodes = \
2078         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2079
2080     unlocked_instances = \
2081         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2082
2083     if unlocked_nodes:
2084       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2085                                  utils.CommaJoin(unlocked_nodes),
2086                                  errors.ECODE_STATE)
2087
2088     if unlocked_instances:
2089       raise errors.OpPrereqError("Missing lock for instances: %s" %
2090                                  utils.CommaJoin(unlocked_instances),
2091                                  errors.ECODE_STATE)
2092
2093     self.all_node_info = self.cfg.GetAllNodesInfo()
2094     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2095
2096     self.my_node_names = utils.NiceSort(group_nodes)
2097     self.my_inst_names = utils.NiceSort(group_instances)
2098
2099     self.my_node_info = dict((name, self.all_node_info[name])
2100                              for name in self.my_node_names)
2101
2102     self.my_inst_info = dict((name, self.all_inst_info[name])
2103                              for name in self.my_inst_names)
2104
2105     # We detect here the nodes that will need the extra RPC calls for verifying
2106     # split LV volumes; they should be locked.
2107     extra_lv_nodes = set()
2108
2109     for inst in self.my_inst_info.values():
2110       if inst.disk_template in constants.DTS_INT_MIRROR:
2111         for nname in inst.all_nodes:
2112           if self.all_node_info[nname].group != self.group_uuid:
2113             extra_lv_nodes.add(nname)
2114
2115     unlocked_lv_nodes = \
2116         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2117
2118     if unlocked_lv_nodes:
2119       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2120                                  utils.CommaJoin(unlocked_lv_nodes),
2121                                  errors.ECODE_STATE)
2122     self.extra_lv_nodes = list(extra_lv_nodes)
2123
2124   def _VerifyNode(self, ninfo, nresult):
2125     """Perform some basic validation on data returned from a node.
2126
2127       - check the result data structure is well formed and has all the
2128         mandatory fields
2129       - check ganeti version
2130
2131     @type ninfo: L{objects.Node}
2132     @param ninfo: the node to check
2133     @param nresult: the results from the node
2134     @rtype: boolean
2135     @return: whether overall this call was successful (and we can expect
2136          reasonable values in the respose)
2137
2138     """
2139     node = ninfo.name
2140     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2141
2142     # main result, nresult should be a non-empty dict
2143     test = not nresult or not isinstance(nresult, dict)
2144     _ErrorIf(test, constants.CV_ENODERPC, node,
2145                   "unable to verify node: no data returned")
2146     if test:
2147       return False
2148
2149     # compares ganeti version
2150     local_version = constants.PROTOCOL_VERSION
2151     remote_version = nresult.get("version", None)
2152     test = not (remote_version and
2153                 isinstance(remote_version, (list, tuple)) and
2154                 len(remote_version) == 2)
2155     _ErrorIf(test, constants.CV_ENODERPC, node,
2156              "connection to node returned invalid data")
2157     if test:
2158       return False
2159
2160     test = local_version != remote_version[0]
2161     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2162              "incompatible protocol versions: master %s,"
2163              " node %s", local_version, remote_version[0])
2164     if test:
2165       return False
2166
2167     # node seems compatible, we can actually try to look into its results
2168
2169     # full package version
2170     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2171                   constants.CV_ENODEVERSION, node,
2172                   "software version mismatch: master %s, node %s",
2173                   constants.RELEASE_VERSION, remote_version[1],
2174                   code=self.ETYPE_WARNING)
2175
2176     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2177     if ninfo.vm_capable and isinstance(hyp_result, dict):
2178       for hv_name, hv_result in hyp_result.iteritems():
2179         test = hv_result is not None
2180         _ErrorIf(test, constants.CV_ENODEHV, node,
2181                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2182
2183     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2184     if ninfo.vm_capable and isinstance(hvp_result, list):
2185       for item, hv_name, hv_result in hvp_result:
2186         _ErrorIf(True, constants.CV_ENODEHV, node,
2187                  "hypervisor %s parameter verify failure (source %s): %s",
2188                  hv_name, item, hv_result)
2189
2190     test = nresult.get(constants.NV_NODESETUP,
2191                        ["Missing NODESETUP results"])
2192     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2193              "; ".join(test))
2194
2195     return True
2196
2197   def _VerifyNodeTime(self, ninfo, nresult,
2198                       nvinfo_starttime, nvinfo_endtime):
2199     """Check the node time.
2200
2201     @type ninfo: L{objects.Node}
2202     @param ninfo: the node to check
2203     @param nresult: the remote results for the node
2204     @param nvinfo_starttime: the start time of the RPC call
2205     @param nvinfo_endtime: the end time of the RPC call
2206
2207     """
2208     node = ninfo.name
2209     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2210
2211     ntime = nresult.get(constants.NV_TIME, None)
2212     try:
2213       ntime_merged = utils.MergeTime(ntime)
2214     except (ValueError, TypeError):
2215       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2216       return
2217
2218     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2219       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2220     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2221       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2222     else:
2223       ntime_diff = None
2224
2225     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2226              "Node time diverges by at least %s from master node time",
2227              ntime_diff)
2228
2229   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2230     """Check the node LVM results.
2231
2232     @type ninfo: L{objects.Node}
2233     @param ninfo: the node to check
2234     @param nresult: the remote results for the node
2235     @param vg_name: the configured VG name
2236
2237     """
2238     if vg_name is None:
2239       return
2240
2241     node = ninfo.name
2242     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2243
2244     # checks vg existence and size > 20G
2245     vglist = nresult.get(constants.NV_VGLIST, None)
2246     test = not vglist
2247     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2248     if not test:
2249       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2250                                             constants.MIN_VG_SIZE)
2251       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2252
2253     # check pv names
2254     pvlist = nresult.get(constants.NV_PVLIST, None)
2255     test = pvlist is None
2256     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2257     if not test:
2258       # check that ':' is not present in PV names, since it's a
2259       # special character for lvcreate (denotes the range of PEs to
2260       # use on the PV)
2261       for _, pvname, owner_vg in pvlist:
2262         test = ":" in pvname
2263         _ErrorIf(test, constants.CV_ENODELVM, node,
2264                  "Invalid character ':' in PV '%s' of VG '%s'",
2265                  pvname, owner_vg)
2266
2267   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2268     """Check the node bridges.
2269
2270     @type ninfo: L{objects.Node}
2271     @param ninfo: the node to check
2272     @param nresult: the remote results for the node
2273     @param bridges: the expected list of bridges
2274
2275     """
2276     if not bridges:
2277       return
2278
2279     node = ninfo.name
2280     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2281
2282     missing = nresult.get(constants.NV_BRIDGES, None)
2283     test = not isinstance(missing, list)
2284     _ErrorIf(test, constants.CV_ENODENET, node,
2285              "did not return valid bridge information")
2286     if not test:
2287       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2288                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2289
2290   def _VerifyNodeUserScripts(self, ninfo, nresult):
2291     """Check the results of user scripts presence and executability on the node
2292
2293     @type ninfo: L{objects.Node}
2294     @param ninfo: the node to check
2295     @param nresult: the remote results for the node
2296
2297     """
2298     node = ninfo.name
2299
2300     test = not constants.NV_USERSCRIPTS in nresult
2301     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2302                   "did not return user scripts information")
2303
2304     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2305     if not test:
2306       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2307                     "user scripts not present or not executable: %s" %
2308                     utils.CommaJoin(sorted(broken_scripts)))
2309
2310   def _VerifyNodeNetwork(self, ninfo, nresult):
2311     """Check the node network connectivity results.
2312
2313     @type ninfo: L{objects.Node}
2314     @param ninfo: the node to check
2315     @param nresult: the remote results for the node
2316
2317     """
2318     node = ninfo.name
2319     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2320
2321     test = constants.NV_NODELIST not in nresult
2322     _ErrorIf(test, constants.CV_ENODESSH, node,
2323              "node hasn't returned node ssh connectivity data")
2324     if not test:
2325       if nresult[constants.NV_NODELIST]:
2326         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2327           _ErrorIf(True, constants.CV_ENODESSH, node,
2328                    "ssh communication with node '%s': %s", a_node, a_msg)
2329
2330     test = constants.NV_NODENETTEST not in nresult
2331     _ErrorIf(test, constants.CV_ENODENET, node,
2332              "node hasn't returned node tcp connectivity data")
2333     if not test:
2334       if nresult[constants.NV_NODENETTEST]:
2335         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2336         for anode in nlist:
2337           _ErrorIf(True, constants.CV_ENODENET, node,
2338                    "tcp communication with node '%s': %s",
2339                    anode, nresult[constants.NV_NODENETTEST][anode])
2340
2341     test = constants.NV_MASTERIP not in nresult
2342     _ErrorIf(test, constants.CV_ENODENET, node,
2343              "node hasn't returned node master IP reachability data")
2344     if not test:
2345       if not nresult[constants.NV_MASTERIP]:
2346         if node == self.master_node:
2347           msg = "the master node cannot reach the master IP (not configured?)"
2348         else:
2349           msg = "cannot reach the master IP"
2350         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2351
2352   def _VerifyInstance(self, instance, instanceconfig, node_image,
2353                       diskstatus):
2354     """Verify an instance.
2355
2356     This function checks to see if the required block devices are
2357     available on the instance's node.
2358
2359     """
2360     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361     node_current = instanceconfig.primary_node
2362
2363     node_vol_should = {}
2364     instanceconfig.MapLVsByNode(node_vol_should)
2365
2366     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2367     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2368     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2369
2370     for node in node_vol_should:
2371       n_img = node_image[node]
2372       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2373         # ignore missing volumes on offline or broken nodes
2374         continue
2375       for volume in node_vol_should[node]:
2376         test = volume not in n_img.volumes
2377         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2378                  "volume %s missing on node %s", volume, node)
2379
2380     if instanceconfig.admin_state == constants.ADMINST_UP:
2381       pri_img = node_image[node_current]
2382       test = instance not in pri_img.instances and not pri_img.offline
2383       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2384                "instance not running on its primary node %s",
2385                node_current)
2386
2387     diskdata = [(nname, success, status, idx)
2388                 for (nname, disks) in diskstatus.items()
2389                 for idx, (success, status) in enumerate(disks)]
2390
2391     for nname, success, bdev_status, idx in diskdata:
2392       # the 'ghost node' construction in Exec() ensures that we have a
2393       # node here
2394       snode = node_image[nname]
2395       bad_snode = snode.ghost or snode.offline
2396       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2397                not success and not bad_snode,
2398                constants.CV_EINSTANCEFAULTYDISK, instance,
2399                "couldn't retrieve status for disk/%s on %s: %s",
2400                idx, nname, bdev_status)
2401       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2402                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2403                constants.CV_EINSTANCEFAULTYDISK, instance,
2404                "disk/%s on %s is faulty", idx, nname)
2405
2406   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2407     """Verify if there are any unknown volumes in the cluster.
2408
2409     The .os, .swap and backup volumes are ignored. All other volumes are
2410     reported as unknown.
2411
2412     @type reserved: L{ganeti.utils.FieldSet}
2413     @param reserved: a FieldSet of reserved volume names
2414
2415     """
2416     for node, n_img in node_image.items():
2417       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2418           self.all_node_info[node].group != self.group_uuid):
2419         # skip non-healthy nodes
2420         continue
2421       for volume in n_img.volumes:
2422         test = ((node not in node_vol_should or
2423                 volume not in node_vol_should[node]) and
2424                 not reserved.Matches(volume))
2425         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2426                       "volume %s is unknown", volume)
2427
2428   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2429     """Verify N+1 Memory Resilience.
2430
2431     Check that if one single node dies we can still start all the
2432     instances it was primary for.
2433
2434     """
2435     cluster_info = self.cfg.GetClusterInfo()
2436     for node, n_img in node_image.items():
2437       # This code checks that every node which is now listed as
2438       # secondary has enough memory to host all instances it is
2439       # supposed to should a single other node in the cluster fail.
2440       # FIXME: not ready for failover to an arbitrary node
2441       # FIXME: does not support file-backed instances
2442       # WARNING: we currently take into account down instances as well
2443       # as up ones, considering that even if they're down someone
2444       # might want to start them even in the event of a node failure.
2445       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2446         # we're skipping nodes marked offline and nodes in other groups from
2447         # the N+1 warning, since most likely we don't have good memory
2448         # infromation from them; we already list instances living on such
2449         # nodes, and that's enough warning
2450         continue
2451       #TODO(dynmem): also consider ballooning out other instances
2452       for prinode, instances in n_img.sbp.items():
2453         needed_mem = 0
2454         for instance in instances:
2455           bep = cluster_info.FillBE(instance_cfg[instance])
2456           if bep[constants.BE_AUTO_BALANCE]:
2457             needed_mem += bep[constants.BE_MINMEM]
2458         test = n_img.mfree < needed_mem
2459         self._ErrorIf(test, constants.CV_ENODEN1, node,
2460                       "not enough memory to accomodate instance failovers"
2461                       " should node %s fail (%dMiB needed, %dMiB available)",
2462                       prinode, needed_mem, n_img.mfree)
2463
2464   @classmethod
2465   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2466                    (files_all, files_opt, files_mc, files_vm)):
2467     """Verifies file checksums collected from all nodes.
2468
2469     @param errorif: Callback for reporting errors
2470     @param nodeinfo: List of L{objects.Node} objects
2471     @param master_node: Name of master node
2472     @param all_nvinfo: RPC results
2473
2474     """
2475     # Define functions determining which nodes to consider for a file
2476     files2nodefn = [
2477       (files_all, None),
2478       (files_mc, lambda node: (node.master_candidate or
2479                                node.name == master_node)),
2480       (files_vm, lambda node: node.vm_capable),
2481       ]
2482
2483     # Build mapping from filename to list of nodes which should have the file
2484     nodefiles = {}
2485     for (files, fn) in files2nodefn:
2486       if fn is None:
2487         filenodes = nodeinfo
2488       else:
2489         filenodes = filter(fn, nodeinfo)
2490       nodefiles.update((filename,
2491                         frozenset(map(operator.attrgetter("name"), filenodes)))
2492                        for filename in files)
2493
2494     assert set(nodefiles) == (files_all | files_mc | files_vm)
2495
2496     fileinfo = dict((filename, {}) for filename in nodefiles)
2497     ignore_nodes = set()
2498
2499     for node in nodeinfo:
2500       if node.offline:
2501         ignore_nodes.add(node.name)
2502         continue
2503
2504       nresult = all_nvinfo[node.name]
2505
2506       if nresult.fail_msg or not nresult.payload:
2507         node_files = None
2508       else:
2509         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2510
2511       test = not (node_files and isinstance(node_files, dict))
2512       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2513               "Node did not return file checksum data")
2514       if test:
2515         ignore_nodes.add(node.name)
2516         continue
2517
2518       # Build per-checksum mapping from filename to nodes having it
2519       for (filename, checksum) in node_files.items():
2520         assert filename in nodefiles
2521         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2522
2523     for (filename, checksums) in fileinfo.items():
2524       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2525
2526       # Nodes having the file
2527       with_file = frozenset(node_name
2528                             for nodes in fileinfo[filename].values()
2529                             for node_name in nodes) - ignore_nodes
2530
2531       expected_nodes = nodefiles[filename] - ignore_nodes
2532
2533       # Nodes missing file
2534       missing_file = expected_nodes - with_file
2535
2536       if filename in files_opt:
2537         # All or no nodes
2538         errorif(missing_file and missing_file != expected_nodes,
2539                 constants.CV_ECLUSTERFILECHECK, None,
2540                 "File %s is optional, but it must exist on all or no"
2541                 " nodes (not found on %s)",
2542                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2543       else:
2544         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2545                 "File %s is missing from node(s) %s", filename,
2546                 utils.CommaJoin(utils.NiceSort(missing_file)))
2547
2548         # Warn if a node has a file it shouldn't
2549         unexpected = with_file - expected_nodes
2550         errorif(unexpected,
2551                 constants.CV_ECLUSTERFILECHECK, None,
2552                 "File %s should not exist on node(s) %s",
2553                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2554
2555       # See if there are multiple versions of the file
2556       test = len(checksums) > 1
2557       if test:
2558         variants = ["variant %s on %s" %
2559                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2560                     for (idx, (checksum, nodes)) in
2561                       enumerate(sorted(checksums.items()))]
2562       else:
2563         variants = []
2564
2565       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2566               "File %s found with %s different checksums (%s)",
2567               filename, len(checksums), "; ".join(variants))
2568
2569   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2570                       drbd_map):
2571     """Verifies and the node DRBD status.
2572
2573     @type ninfo: L{objects.Node}
2574     @param ninfo: the node to check
2575     @param nresult: the remote results for the node
2576     @param instanceinfo: the dict of instances
2577     @param drbd_helper: the configured DRBD usermode helper
2578     @param drbd_map: the DRBD map as returned by
2579         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2580
2581     """
2582     node = ninfo.name
2583     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2584
2585     if drbd_helper:
2586       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2587       test = (helper_result == None)
2588       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589                "no drbd usermode helper returned")
2590       if helper_result:
2591         status, payload = helper_result
2592         test = not status
2593         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2594                  "drbd usermode helper check unsuccessful: %s", payload)
2595         test = status and (payload != drbd_helper)
2596         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2597                  "wrong drbd usermode helper: %s", payload)
2598
2599     # compute the DRBD minors
2600     node_drbd = {}
2601     for minor, instance in drbd_map[node].items():
2602       test = instance not in instanceinfo
2603       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2604                "ghost instance '%s' in temporary DRBD map", instance)
2605         # ghost instance should not be running, but otherwise we
2606         # don't give double warnings (both ghost instance and
2607         # unallocated minor in use)
2608       if test:
2609         node_drbd[minor] = (instance, False)
2610       else:
2611         instance = instanceinfo[instance]
2612         node_drbd[minor] = (instance.name,
2613                             instance.admin_state == constants.ADMINST_UP)
2614
2615     # and now check them
2616     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2617     test = not isinstance(used_minors, (tuple, list))
2618     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2619              "cannot parse drbd status file: %s", str(used_minors))
2620     if test:
2621       # we cannot check drbd status
2622       return
2623
2624     for minor, (iname, must_exist) in node_drbd.items():
2625       test = minor not in used_minors and must_exist
2626       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2627                "drbd minor %d of instance %s is not active", minor, iname)
2628     for minor in used_minors:
2629       test = minor not in node_drbd
2630       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2631                "unallocated drbd minor %d is in use", minor)
2632
2633   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2634     """Builds the node OS structures.
2635
2636     @type ninfo: L{objects.Node}
2637     @param ninfo: the node to check
2638     @param nresult: the remote results for the node
2639     @param nimg: the node image object
2640
2641     """
2642     node = ninfo.name
2643     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644
2645     remote_os = nresult.get(constants.NV_OSLIST, None)
2646     test = (not isinstance(remote_os, list) or
2647             not compat.all(isinstance(v, list) and len(v) == 7
2648                            for v in remote_os))
2649
2650     _ErrorIf(test, constants.CV_ENODEOS, node,
2651              "node hasn't returned valid OS data")
2652
2653     nimg.os_fail = test
2654
2655     if test:
2656       return
2657
2658     os_dict = {}
2659
2660     for (name, os_path, status, diagnose,
2661          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2662
2663       if name not in os_dict:
2664         os_dict[name] = []
2665
2666       # parameters is a list of lists instead of list of tuples due to
2667       # JSON lacking a real tuple type, fix it:
2668       parameters = [tuple(v) for v in parameters]
2669       os_dict[name].append((os_path, status, diagnose,
2670                             set(variants), set(parameters), set(api_ver)))
2671
2672     nimg.oslist = os_dict
2673
2674   def _VerifyNodeOS(self, ninfo, nimg, base):
2675     """Verifies the node OS list.
2676
2677     @type ninfo: L{objects.Node}
2678     @param ninfo: the node to check
2679     @param nimg: the node image object
2680     @param base: the 'template' node we match against (e.g. from the master)
2681
2682     """
2683     node = ninfo.name
2684     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2685
2686     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2687
2688     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2689     for os_name, os_data in nimg.oslist.items():
2690       assert os_data, "Empty OS status for OS %s?!" % os_name
2691       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2692       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2693                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2694       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2695                "OS '%s' has multiple entries (first one shadows the rest): %s",
2696                os_name, utils.CommaJoin([v[0] for v in os_data]))
2697       # comparisons with the 'base' image
2698       test = os_name not in base.oslist
2699       _ErrorIf(test, constants.CV_ENODEOS, node,
2700                "Extra OS %s not present on reference node (%s)",
2701                os_name, base.name)
2702       if test:
2703         continue
2704       assert base.oslist[os_name], "Base node has empty OS status?"
2705       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2706       if not b_status:
2707         # base OS is invalid, skipping
2708         continue
2709       for kind, a, b in [("API version", f_api, b_api),
2710                          ("variants list", f_var, b_var),
2711                          ("parameters", beautify_params(f_param),
2712                           beautify_params(b_param))]:
2713         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2714                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2715                  kind, os_name, base.name,
2716                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2717
2718     # check any missing OSes
2719     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2720     _ErrorIf(missing, constants.CV_ENODEOS, node,
2721              "OSes present on reference node %s but missing on this node: %s",
2722              base.name, utils.CommaJoin(missing))
2723
2724   def _VerifyOob(self, ninfo, nresult):
2725     """Verifies out of band functionality of a node.
2726
2727     @type ninfo: L{objects.Node}
2728     @param ninfo: the node to check
2729     @param nresult: the remote results for the node
2730
2731     """
2732     node = ninfo.name
2733     # We just have to verify the paths on master and/or master candidates
2734     # as the oob helper is invoked on the master
2735     if ((ninfo.master_candidate or ninfo.master_capable) and
2736         constants.NV_OOB_PATHS in nresult):
2737       for path_result in nresult[constants.NV_OOB_PATHS]:
2738         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2739
2740   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2741     """Verifies and updates the node volume data.
2742
2743     This function will update a L{NodeImage}'s internal structures
2744     with data from the remote call.
2745
2746     @type ninfo: L{objects.Node}
2747     @param ninfo: the node to check
2748     @param nresult: the remote results for the node
2749     @param nimg: the node image object
2750     @param vg_name: the configured VG name
2751
2752     """
2753     node = ninfo.name
2754     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2755
2756     nimg.lvm_fail = True
2757     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2758     if vg_name is None:
2759       pass
2760     elif isinstance(lvdata, basestring):
2761       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2762                utils.SafeEncode(lvdata))
2763     elif not isinstance(lvdata, dict):
2764       _ErrorIf(True, constants.CV_ENODELVM, node,
2765                "rpc call to node failed (lvlist)")
2766     else:
2767       nimg.volumes = lvdata
2768       nimg.lvm_fail = False
2769
2770   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2771     """Verifies and updates the node instance list.
2772
2773     If the listing was successful, then updates this node's instance
2774     list. Otherwise, it marks the RPC call as failed for the instance
2775     list key.
2776
2777     @type ninfo: L{objects.Node}
2778     @param ninfo: the node to check
2779     @param nresult: the remote results for the node
2780     @param nimg: the node image object
2781
2782     """
2783     idata = nresult.get(constants.NV_INSTANCELIST, None)
2784     test = not isinstance(idata, list)
2785     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2786                   "rpc call to node failed (instancelist): %s",
2787                   utils.SafeEncode(str(idata)))
2788     if test:
2789       nimg.hyp_fail = True
2790     else:
2791       nimg.instances = idata
2792
2793   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2794     """Verifies and computes a node information map
2795
2796     @type ninfo: L{objects.Node}
2797     @param ninfo: the node to check
2798     @param nresult: the remote results for the node
2799     @param nimg: the node image object
2800     @param vg_name: the configured VG name
2801
2802     """
2803     node = ninfo.name
2804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2805
2806     # try to read free memory (from the hypervisor)
2807     hv_info = nresult.get(constants.NV_HVINFO, None)
2808     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2809     _ErrorIf(test, constants.CV_ENODEHV, node,
2810              "rpc call to node failed (hvinfo)")
2811     if not test:
2812       try:
2813         nimg.mfree = int(hv_info["memory_free"])
2814       except (ValueError, TypeError):
2815         _ErrorIf(True, constants.CV_ENODERPC, node,
2816                  "node returned invalid nodeinfo, check hypervisor")
2817
2818     # FIXME: devise a free space model for file based instances as well
2819     if vg_name is not None:
2820       test = (constants.NV_VGLIST not in nresult or
2821               vg_name not in nresult[constants.NV_VGLIST])
2822       _ErrorIf(test, constants.CV_ENODELVM, node,
2823                "node didn't return data for the volume group '%s'"
2824                " - it is either missing or broken", vg_name)
2825       if not test:
2826         try:
2827           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2828         except (ValueError, TypeError):
2829           _ErrorIf(True, constants.CV_ENODERPC, node,
2830                    "node returned invalid LVM info, check LVM status")
2831
2832   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2833     """Gets per-disk status information for all instances.
2834
2835     @type nodelist: list of strings
2836     @param nodelist: Node names
2837     @type node_image: dict of (name, L{objects.Node})
2838     @param node_image: Node objects
2839     @type instanceinfo: dict of (name, L{objects.Instance})
2840     @param instanceinfo: Instance objects
2841     @rtype: {instance: {node: [(succes, payload)]}}
2842     @return: a dictionary of per-instance dictionaries with nodes as
2843         keys and disk information as values; the disk information is a
2844         list of tuples (success, payload)
2845
2846     """
2847     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2848
2849     node_disks = {}
2850     node_disks_devonly = {}
2851     diskless_instances = set()
2852     diskless = constants.DT_DISKLESS
2853
2854     for nname in nodelist:
2855       node_instances = list(itertools.chain(node_image[nname].pinst,
2856                                             node_image[nname].sinst))
2857       diskless_instances.update(inst for inst in node_instances
2858                                 if instanceinfo[inst].disk_template == diskless)
2859       disks = [(inst, disk)
2860                for inst in node_instances
2861                for disk in instanceinfo[inst].disks]
2862
2863       if not disks:
2864         # No need to collect data
2865         continue
2866
2867       node_disks[nname] = disks
2868
2869       # Creating copies as SetDiskID below will modify the objects and that can
2870       # lead to incorrect data returned from nodes
2871       devonly = [dev.Copy() for (_, dev) in disks]
2872
2873       for dev in devonly:
2874         self.cfg.SetDiskID(dev, nname)
2875
2876       node_disks_devonly[nname] = devonly
2877
2878     assert len(node_disks) == len(node_disks_devonly)
2879
2880     # Collect data from all nodes with disks
2881     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2882                                                           node_disks_devonly)
2883
2884     assert len(result) == len(node_disks)
2885
2886     instdisk = {}
2887
2888     for (nname, nres) in result.items():
2889       disks = node_disks[nname]
2890
2891       if nres.offline:
2892         # No data from this node
2893         data = len(disks) * [(False, "node offline")]
2894       else:
2895         msg = nres.fail_msg
2896         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2897                  "while getting disk information: %s", msg)
2898         if msg:
2899           # No data from this node
2900           data = len(disks) * [(False, msg)]
2901         else:
2902           data = []
2903           for idx, i in enumerate(nres.payload):
2904             if isinstance(i, (tuple, list)) and len(i) == 2:
2905               data.append(i)
2906             else:
2907               logging.warning("Invalid result from node %s, entry %d: %s",
2908                               nname, idx, i)
2909               data.append((False, "Invalid result from the remote node"))
2910
2911       for ((inst, _), status) in zip(disks, data):
2912         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2913
2914     # Add empty entries for diskless instances.
2915     for inst in diskless_instances:
2916       assert inst not in instdisk
2917       instdisk[inst] = {}
2918
2919     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2920                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2921                       compat.all(isinstance(s, (tuple, list)) and
2922                                  len(s) == 2 for s in statuses)
2923                       for inst, nnames in instdisk.items()
2924                       for nname, statuses in nnames.items())
2925     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2926
2927     return instdisk
2928
2929   @staticmethod
2930   def _SshNodeSelector(group_uuid, all_nodes):
2931     """Create endless iterators for all potential SSH check hosts.
2932
2933     """
2934     nodes = [node for node in all_nodes
2935              if (node.group != group_uuid and
2936                  not node.offline)]
2937     keyfunc = operator.attrgetter("group")
2938
2939     return map(itertools.cycle,
2940                [sorted(map(operator.attrgetter("name"), names))
2941                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2942                                                   keyfunc)])
2943
2944   @classmethod
2945   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2946     """Choose which nodes should talk to which other nodes.
2947
2948     We will make nodes contact all nodes in their group, and one node from
2949     every other group.
2950
2951     @warning: This algorithm has a known issue if one node group is much
2952       smaller than others (e.g. just one node). In such a case all other
2953       nodes will talk to the single node.
2954
2955     """
2956     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2957     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2958
2959     return (online_nodes,
2960             dict((name, sorted([i.next() for i in sel]))
2961                  for name in online_nodes))
2962
2963   def BuildHooksEnv(self):
2964     """Build hooks env.
2965
2966     Cluster-Verify hooks just ran in the post phase and their failure makes
2967     the output be logged in the verify output and the verification to fail.
2968
2969     """
2970     env = {
2971       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2972       }
2973
2974     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2975                for node in self.my_node_info.values())
2976
2977     return env
2978
2979   def BuildHooksNodes(self):
2980     """Build hooks nodes.
2981
2982     """
2983     return ([], self.my_node_names)
2984
2985   def Exec(self, feedback_fn):
2986     """Verify integrity of the node group, performing various test on nodes.
2987
2988     """
2989     # This method has too many local variables. pylint: disable=R0914
2990     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2991
2992     if not self.my_node_names:
2993       # empty node group
2994       feedback_fn("* Empty node group, skipping verification")
2995       return True
2996
2997     self.bad = False
2998     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2999     verbose = self.op.verbose
3000     self._feedback_fn = feedback_fn
3001
3002     vg_name = self.cfg.GetVGName()
3003     drbd_helper = self.cfg.GetDRBDHelper()
3004     cluster = self.cfg.GetClusterInfo()
3005     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3006     hypervisors = cluster.enabled_hypervisors
3007     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3008
3009     i_non_redundant = [] # Non redundant instances
3010     i_non_a_balanced = [] # Non auto-balanced instances
3011     i_offline = 0 # Count of offline instances
3012     n_offline = 0 # Count of offline nodes
3013     n_drained = 0 # Count of nodes being drained
3014     node_vol_should = {}
3015
3016     # FIXME: verify OS list
3017
3018     # File verification
3019     filemap = _ComputeAncillaryFiles(cluster, False)
3020
3021     # do local checksums
3022     master_node = self.master_node = self.cfg.GetMasterNode()
3023     master_ip = self.cfg.GetMasterIP()
3024
3025     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3026
3027     user_scripts = []
3028     if self.cfg.GetUseExternalMipScript():
3029       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3030
3031     node_verify_param = {
3032       constants.NV_FILELIST:
3033         utils.UniqueSequence(filename
3034                              for files in filemap
3035                              for filename in files),
3036       constants.NV_NODELIST:
3037         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3038                                   self.all_node_info.values()),
3039       constants.NV_HYPERVISOR: hypervisors,
3040       constants.NV_HVPARAMS:
3041         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3042       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3043                                  for node in node_data_list
3044                                  if not node.offline],
3045       constants.NV_INSTANCELIST: hypervisors,
3046       constants.NV_VERSION: None,
3047       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3048       constants.NV_NODESETUP: None,
3049       constants.NV_TIME: None,
3050       constants.NV_MASTERIP: (master_node, master_ip),
3051       constants.NV_OSLIST: None,
3052       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3053       constants.NV_USERSCRIPTS: user_scripts,
3054       }
3055
3056     if vg_name is not None:
3057       node_verify_param[constants.NV_VGLIST] = None
3058       node_verify_param[constants.NV_LVLIST] = vg_name
3059       node_verify_param[constants.NV_PVLIST] = [vg_name]
3060       node_verify_param[constants.NV_DRBDLIST] = None
3061
3062     if drbd_helper:
3063       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3064
3065     # bridge checks
3066     # FIXME: this needs to be changed per node-group, not cluster-wide
3067     bridges = set()
3068     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3069     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070       bridges.add(default_nicpp[constants.NIC_LINK])
3071     for instance in self.my_inst_info.values():
3072       for nic in instance.nics:
3073         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3074         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3075           bridges.add(full_nic[constants.NIC_LINK])
3076
3077     if bridges:
3078       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3079
3080     # Build our expected cluster state
3081     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3082                                                  name=node.name,
3083                                                  vm_capable=node.vm_capable))
3084                       for node in node_data_list)
3085
3086     # Gather OOB paths
3087     oob_paths = []
3088     for node in self.all_node_info.values():
3089       path = _SupportsOob(self.cfg, node)
3090       if path and path not in oob_paths:
3091         oob_paths.append(path)
3092
3093     if oob_paths:
3094       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3095
3096     for instance in self.my_inst_names:
3097       inst_config = self.my_inst_info[instance]
3098
3099       for nname in inst_config.all_nodes:
3100         if nname not in node_image:
3101           gnode = self.NodeImage(name=nname)
3102           gnode.ghost = (nname not in self.all_node_info)
3103           node_image[nname] = gnode
3104
3105       inst_config.MapLVsByNode(node_vol_should)
3106
3107       pnode = inst_config.primary_node
3108       node_image[pnode].pinst.append(instance)
3109
3110       for snode in inst_config.secondary_nodes:
3111         nimg = node_image[snode]
3112         nimg.sinst.append(instance)
3113         if pnode not in nimg.sbp:
3114           nimg.sbp[pnode] = []
3115         nimg.sbp[pnode].append(instance)
3116
3117     # At this point, we have the in-memory data structures complete,
3118     # except for the runtime information, which we'll gather next
3119
3120     # Due to the way our RPC system works, exact response times cannot be
3121     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3122     # time before and after executing the request, we can at least have a time
3123     # window.
3124     nvinfo_starttime = time.time()
3125     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3126                                            node_verify_param,
3127                                            self.cfg.GetClusterName())
3128     nvinfo_endtime = time.time()
3129
3130     if self.extra_lv_nodes and vg_name is not None:
3131       extra_lv_nvinfo = \
3132           self.rpc.call_node_verify(self.extra_lv_nodes,
3133                                     {constants.NV_LVLIST: vg_name},
3134                                     self.cfg.GetClusterName())
3135     else:
3136       extra_lv_nvinfo = {}
3137
3138     all_drbd_map = self.cfg.ComputeDRBDMap()
3139
3140     feedback_fn("* Gathering disk information (%s nodes)" %
3141                 len(self.my_node_names))
3142     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3143                                      self.my_inst_info)
3144
3145     feedback_fn("* Verifying configuration file consistency")
3146
3147     # If not all nodes are being checked, we need to make sure the master node
3148     # and a non-checked vm_capable node are in the list.
3149     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3150     if absent_nodes:
3151       vf_nvinfo = all_nvinfo.copy()
3152       vf_node_info = list(self.my_node_info.values())
3153       additional_nodes = []
3154       if master_node not in self.my_node_info:
3155         additional_nodes.append(master_node)
3156         vf_node_info.append(self.all_node_info[master_node])
3157       # Add the first vm_capable node we find which is not included
3158       for node in absent_nodes:
3159         nodeinfo = self.all_node_info[node]
3160         if nodeinfo.vm_capable and not nodeinfo.offline:
3161           additional_nodes.append(node)
3162           vf_node_info.append(self.all_node_info[node])
3163           break
3164       key = constants.NV_FILELIST
3165       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3166                                                  {key: node_verify_param[key]},
3167                                                  self.cfg.GetClusterName()))
3168     else:
3169       vf_nvinfo = all_nvinfo
3170       vf_node_info = self.my_node_info.values()
3171
3172     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3173
3174     feedback_fn("* Verifying node status")
3175
3176     refos_img = None
3177
3178     for node_i in node_data_list:
3179       node = node_i.name
3180       nimg = node_image[node]
3181
3182       if node_i.offline:
3183         if verbose:
3184           feedback_fn("* Skipping offline node %s" % (node,))
3185         n_offline += 1
3186         continue
3187
3188       if node == master_node:
3189         ntype = "master"
3190       elif node_i.master_candidate:
3191         ntype = "master candidate"
3192       elif node_i.drained:
3193         ntype = "drained"
3194         n_drained += 1
3195       else:
3196         ntype = "regular"
3197       if verbose:
3198         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3199
3200       msg = all_nvinfo[node].fail_msg
3201       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3202                msg)
3203       if msg:
3204         nimg.rpc_fail = True
3205         continue
3206
3207       nresult = all_nvinfo[node].payload
3208
3209       nimg.call_ok = self._VerifyNode(node_i, nresult)
3210       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3211       self._VerifyNodeNetwork(node_i, nresult)
3212       self._VerifyNodeUserScripts(node_i, nresult)
3213       self._VerifyOob(node_i, nresult)
3214
3215       if nimg.vm_capable:
3216         self._VerifyNodeLVM(node_i, nresult, vg_name)
3217         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3218                              all_drbd_map)
3219
3220         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3221         self._UpdateNodeInstances(node_i, nresult, nimg)
3222         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3223         self._UpdateNodeOS(node_i, nresult, nimg)
3224
3225         if not nimg.os_fail:
3226           if refos_img is None:
3227             refos_img = nimg
3228           self._VerifyNodeOS(node_i, nimg, refos_img)
3229         self._VerifyNodeBridges(node_i, nresult, bridges)
3230
3231         # Check whether all running instancies are primary for the node. (This
3232         # can no longer be done from _VerifyInstance below, since some of the
3233         # wrong instances could be from other node groups.)
3234         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3235
3236         for inst in non_primary_inst:
3237           # FIXME: investigate best way to handle offline insts
3238           if inst.admin_state == constants.ADMINST_OFFLINE:
3239             if verbose:
3240               feedback_fn("* Skipping offline instance %s" % inst.name)
3241             i_offline += 1
3242             continue
3243           test = inst in self.all_inst_info
3244           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3245                    "instance should not run on node %s", node_i.name)
3246           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3247                    "node is running unknown instance %s", inst)
3248
3249     for node, result in extra_lv_nvinfo.items():
3250       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3251                               node_image[node], vg_name)
3252
3253     feedback_fn("* Verifying instance status")
3254     for instance in self.my_inst_names:
3255       if verbose:
3256         feedback_fn("* Verifying instance %s" % instance)
3257       inst_config = self.my_inst_info[instance]
3258       self._VerifyInstance(instance, inst_config, node_image,
3259                            instdisk[instance])
3260       inst_nodes_offline = []
3261
3262       pnode = inst_config.primary_node
3263       pnode_img = node_image[pnode]
3264       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3265                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3266                " primary node failed", instance)
3267
3268       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3269                pnode_img.offline,
3270                constants.CV_EINSTANCEBADNODE, instance,
3271                "instance is marked as running and lives on offline node %s",
3272                inst_config.primary_node)
3273
3274       # If the instance is non-redundant we cannot survive losing its primary
3275       # node, so we are not N+1 compliant. On the other hand we have no disk
3276       # templates with more than one secondary so that situation is not well
3277       # supported either.
3278       # FIXME: does not support file-backed instances
3279       if not inst_config.secondary_nodes:
3280         i_non_redundant.append(instance)
3281
3282       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3283                constants.CV_EINSTANCELAYOUT,
3284                instance, "instance has multiple secondary nodes: %s",
3285                utils.CommaJoin(inst_config.secondary_nodes),
3286                code=self.ETYPE_WARNING)
3287
3288       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3289         pnode = inst_config.primary_node
3290         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3291         instance_groups = {}
3292
3293         for node in instance_nodes:
3294           instance_groups.setdefault(self.all_node_info[node].group,
3295                                      []).append(node)
3296
3297         pretty_list = [
3298           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3299           # Sort so that we always list the primary node first.
3300           for group, nodes in sorted(instance_groups.items(),
3301                                      key=lambda (_, nodes): pnode in nodes,
3302                                      reverse=True)]
3303
3304         self._ErrorIf(len(instance_groups) > 1,
3305                       constants.CV_EINSTANCESPLITGROUPS,
3306                       instance, "instance has primary and secondary nodes in"
3307                       " different groups: %s", utils.CommaJoin(pretty_list),
3308                       code=self.ETYPE_WARNING)
3309
3310       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3311         i_non_a_balanced.append(instance)
3312
3313       for snode in inst_config.secondary_nodes:
3314         s_img = node_image[snode]
3315         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3316                  snode, "instance %s, connection to secondary node failed",
3317                  instance)
3318
3319         if s_img.offline:
3320           inst_nodes_offline.append(snode)
3321
3322       # warn that the instance lives on offline nodes
3323       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3324                "instance has offline secondary node(s) %s",
3325                utils.CommaJoin(inst_nodes_offline))
3326       # ... or ghost/non-vm_capable nodes
3327       for node in inst_config.all_nodes:
3328         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3329                  instance, "instance lives on ghost node %s", node)
3330         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3331                  instance, "instance lives on non-vm_capable node %s", node)
3332
3333     feedback_fn("* Verifying orphan volumes")
3334     reserved = utils.FieldSet(*cluster.reserved_lvs)
3335
3336     # We will get spurious "unknown volume" warnings if any node of this group
3337     # is secondary for an instance whose primary is in another group. To avoid
3338     # them, we find these instances and add their volumes to node_vol_should.
3339     for inst in self.all_inst_info.values():
3340       for secondary in inst.secondary_nodes:
3341         if (secondary in self.my_node_info
3342             and inst.name not in self.my_inst_info):
3343           inst.MapLVsByNode(node_vol_should)
3344           break
3345
3346     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3347
3348     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3349       feedback_fn("* Verifying N+1 Memory redundancy")
3350       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3351
3352     feedback_fn("* Other Notes")
3353     if i_non_redundant:
3354       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3355                   % len(i_non_redundant))
3356
3357     if i_non_a_balanced:
3358       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3359                   % len(i_non_a_balanced))
3360
3361     if i_offline:
3362       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3363
3364     if n_offline:
3365       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3366
3367     if n_drained:
3368       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3369
3370     return not self.bad
3371
3372   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3373     """Analyze the post-hooks' result
3374
3375     This method analyses the hook result, handles it, and sends some
3376     nicely-formatted feedback back to the user.
3377
3378     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3379         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3380     @param hooks_results: the results of the multi-node hooks rpc call
3381     @param feedback_fn: function used send feedback back to the caller
3382     @param lu_result: previous Exec result
3383     @return: the new Exec result, based on the previous result
3384         and hook results
3385
3386     """
3387     # We only really run POST phase hooks, only for non-empty groups,
3388     # and are only interested in their results
3389     if not self.my_node_names:
3390       # empty node group
3391       pass
3392     elif phase == constants.HOOKS_PHASE_POST:
3393       # Used to change hooks' output to proper indentation
3394       feedback_fn("* Hooks Results")
3395       assert hooks_results, "invalid result from hooks"
3396
3397       for node_name in hooks_results:
3398         res = hooks_results[node_name]
3399         msg = res.fail_msg
3400         test = msg and not res.offline
3401         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3402                       "Communication failure in hooks execution: %s", msg)
3403         if res.offline or msg:
3404           # No need to investigate payload if node is offline or gave
3405           # an error.
3406           continue
3407         for script, hkr, output in res.payload:
3408           test = hkr == constants.HKR_FAIL
3409           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3410                         "Script %s failed, output:", script)
3411           if test:
3412             output = self._HOOKS_INDENT_RE.sub("      ", output)
3413             feedback_fn("%s" % output)
3414             lu_result = False
3415
3416     return lu_result
3417
3418
3419 class LUClusterVerifyDisks(NoHooksLU):
3420   """Verifies the cluster disks status.
3421
3422   """
3423   REQ_BGL = False
3424
3425   def ExpandNames(self):
3426     self.share_locks = _ShareAll()
3427     self.needed_locks = {
3428       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3429       }
3430
3431   def Exec(self, feedback_fn):
3432     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3433
3434     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3435     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3436                            for group in group_names])
3437
3438
3439 class LUGroupVerifyDisks(NoHooksLU):
3440   """Verifies the status of all disks in a node group.
3441
3442   """
3443   REQ_BGL = False
3444
3445   def ExpandNames(self):
3446     # Raises errors.OpPrereqError on its own if group can't be found
3447     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3448
3449     self.share_locks = _ShareAll()
3450     self.needed_locks = {
3451       locking.LEVEL_INSTANCE: [],
3452       locking.LEVEL_NODEGROUP: [],
3453       locking.LEVEL_NODE: [],
3454       }
3455
3456   def DeclareLocks(self, level):
3457     if level == locking.LEVEL_INSTANCE:
3458       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3459
3460       # Lock instances optimistically, needs verification once node and group
3461       # locks have been acquired
3462       self.needed_locks[locking.LEVEL_INSTANCE] = \
3463         self.cfg.GetNodeGroupInstances(self.group_uuid)
3464
3465     elif level == locking.LEVEL_NODEGROUP:
3466       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3467
3468       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3469         set([self.group_uuid] +
3470             # Lock all groups used by instances optimistically; this requires
3471             # going via the node before it's locked, requiring verification
3472             # later on
3473             [group_uuid
3474              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3475              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3476
3477     elif level == locking.LEVEL_NODE:
3478       # This will only lock the nodes in the group to be verified which contain
3479       # actual instances
3480       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3481       self._LockInstancesNodes()
3482
3483       # Lock all nodes in group to be verified
3484       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3485       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3486       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3487
3488   def CheckPrereq(self):
3489     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3490     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3491     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3492
3493     assert self.group_uuid in owned_groups
3494
3495     # Check if locked instances are still correct
3496     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3497
3498     # Get instance information
3499     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3500
3501     # Check if node groups for locked instances are still correct
3502     for (instance_name, inst) in self.instances.items():
3503       assert owned_nodes.issuperset(inst.all_nodes), \
3504         "Instance %s's nodes changed while we kept the lock" % instance_name
3505
3506       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3507                                              owned_groups)
3508
3509       assert self.group_uuid in inst_groups, \
3510         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3511
3512   def Exec(self, feedback_fn):
3513     """Verify integrity of cluster disks.
3514
3515     @rtype: tuple of three items
3516     @return: a tuple of (dict of node-to-node_error, list of instances
3517         which need activate-disks, dict of instance: (node, volume) for
3518         missing volumes
3519
3520     """
3521     res_nodes = {}
3522     res_instances = set()
3523     res_missing = {}
3524
3525     nv_dict = _MapInstanceDisksToNodes([inst
3526             for inst in self.instances.values()
3527             if inst.admin_state == constants.ADMINST_UP])
3528
3529     if nv_dict:
3530       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3531                              set(self.cfg.GetVmCapableNodeList()))
3532
3533       node_lvs = self.rpc.call_lv_list(nodes, [])
3534
3535       for (node, node_res) in node_lvs.items():
3536         if node_res.offline:
3537           continue
3538
3539         msg = node_res.fail_msg
3540         if msg:
3541           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3542           res_nodes[node] = msg
3543           continue
3544
3545         for lv_name, (_, _, lv_online) in node_res.payload.items():
3546           inst = nv_dict.pop((node, lv_name), None)
3547           if not (lv_online or inst is None):
3548             res_instances.add(inst)
3549
3550       # any leftover items in nv_dict are missing LVs, let's arrange the data
3551       # better
3552       for key, inst in nv_dict.iteritems():
3553         res_missing.setdefault(inst, []).append(list(key))
3554
3555     return (res_nodes, list(res_instances), res_missing)
3556
3557
3558 class LUClusterRepairDiskSizes(NoHooksLU):
3559   """Verifies the cluster disks sizes.
3560
3561   """
3562   REQ_BGL = False
3563
3564   def ExpandNames(self):
3565     if self.op.instances:
3566       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3567       self.needed_locks = {
3568         locking.LEVEL_NODE_RES: [],
3569         locking.LEVEL_INSTANCE: self.wanted_names,
3570         }
3571       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3572     else:
3573       self.wanted_names = None
3574       self.needed_locks = {
3575         locking.LEVEL_NODE_RES: locking.ALL_SET,
3576         locking.LEVEL_INSTANCE: locking.ALL_SET,
3577         }
3578     self.share_locks = {
3579       locking.LEVEL_NODE_RES: 1,
3580       locking.LEVEL_INSTANCE: 0,
3581       }
3582
3583   def DeclareLocks(self, level):
3584     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3585       self._LockInstancesNodes(primary_only=True, level=level)
3586
3587   def CheckPrereq(self):
3588     """Check prerequisites.
3589
3590     This only checks the optional instance list against the existing names.
3591
3592     """
3593     if self.wanted_names is None:
3594       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3595
3596     self.wanted_instances = \
3597         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3598
3599   def _EnsureChildSizes(self, disk):
3600     """Ensure children of the disk have the needed disk size.
3601
3602     This is valid mainly for DRBD8 and fixes an issue where the
3603     children have smaller disk size.
3604
3605     @param disk: an L{ganeti.objects.Disk} object
3606
3607     """
3608     if disk.dev_type == constants.LD_DRBD8:
3609       assert disk.children, "Empty children for DRBD8?"
3610       fchild = disk.children[0]
3611       mismatch = fchild.size < disk.size
3612       if mismatch:
3613         self.LogInfo("Child disk has size %d, parent %d, fixing",
3614                      fchild.size, disk.size)
3615         fchild.size = disk.size
3616
3617       # and we recurse on this child only, not on the metadev
3618       return self._EnsureChildSizes(fchild) or mismatch
3619     else:
3620       return False
3621
3622   def Exec(self, feedback_fn):
3623     """Verify the size of cluster disks.
3624
3625     """
3626     # TODO: check child disks too
3627     # TODO: check differences in size between primary/secondary nodes
3628     per_node_disks = {}
3629     for instance in self.wanted_instances:
3630       pnode = instance.primary_node
3631       if pnode not in per_node_disks:
3632         per_node_disks[pnode] = []
3633       for idx, disk in enumerate(instance.disks):
3634         per_node_disks[pnode].append((instance, idx, disk))
3635
3636     assert not (frozenset(per_node_disks.keys()) -
3637                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3638       "Not owning correct locks"
3639     assert not self.owned_locks(locking.LEVEL_NODE)
3640
3641     changed = []
3642     for node, dskl in per_node_disks.items():
3643       newl = [v[2].Copy() for v in dskl]
3644       for dsk in newl:
3645         self.cfg.SetDiskID(dsk, node)
3646       result = self.rpc.call_blockdev_getsize(node, newl)
3647       if result.fail_msg:
3648         self.LogWarning("Failure in blockdev_getsize call to node"
3649                         " %s, ignoring", node)
3650         continue
3651       if len(result.payload) != len(dskl):
3652         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3653                         " result.payload=%s", node, len(dskl), result.payload)
3654         self.LogWarning("Invalid result from node %s, ignoring node results",
3655                         node)
3656         continue
3657       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3658         if size is None:
3659           self.LogWarning("Disk %d of instance %s did not return size"
3660                           " information, ignoring", idx, instance.name)
3661           continue
3662         if not isinstance(size, (int, long)):
3663           self.LogWarning("Disk %d of instance %s did not return valid"
3664                           " size information, ignoring", idx, instance.name)
3665           continue
3666         size = size >> 20
3667         if size != disk.size:
3668           self.LogInfo("Disk %d of instance %s has mismatched size,"
3669                        " correcting: recorded %d, actual %d", idx,
3670                        instance.name, disk.size, size)
3671           disk.size = size
3672           self.cfg.Update(instance, feedback_fn)
3673           changed.append((instance.name, idx, size))
3674         if self._EnsureChildSizes(disk):
3675           self.cfg.Update(instance, feedback_fn)
3676           changed.append((instance.name, idx, disk.size))
3677     return changed
3678
3679
3680 class LUClusterRename(LogicalUnit):
3681   """Rename the cluster.
3682
3683   """
3684   HPATH = "cluster-rename"
3685   HTYPE = constants.HTYPE_CLUSTER
3686
3687   def BuildHooksEnv(self):
3688     """Build hooks env.
3689
3690     """
3691     return {
3692       "OP_TARGET": self.cfg.GetClusterName(),
3693       "NEW_NAME": self.op.name,
3694       }
3695
3696   def BuildHooksNodes(self):
3697     """Build hooks nodes.
3698
3699     """
3700     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3701
3702   def CheckPrereq(self):
3703     """Verify that the passed name is a valid one.
3704
3705     """
3706     hostname = netutils.GetHostname(name=self.op.name,
3707                                     family=self.cfg.GetPrimaryIPFamily())
3708
3709     new_name = hostname.name
3710     self.ip = new_ip = hostname.ip
3711     old_name = self.cfg.GetClusterName()
3712     old_ip = self.cfg.GetMasterIP()
3713     if new_name == old_name and new_ip == old_ip:
3714       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3715                                  " cluster has changed",
3716                                  errors.ECODE_INVAL)
3717     if new_ip != old_ip:
3718       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3719         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3720                                    " reachable on the network" %
3721                                    new_ip, errors.ECODE_NOTUNIQUE)
3722
3723     self.op.name = new_name
3724
3725   def Exec(self, feedback_fn):
3726     """Rename the cluster.
3727
3728     """
3729     clustername = self.op.name
3730     new_ip = self.ip
3731
3732     # shutdown the master IP
3733     master_params = self.cfg.GetMasterNetworkParameters()
3734     ems = self.cfg.GetUseExternalMipScript()
3735     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3736                                                      master_params, ems)
3737     result.Raise("Could not disable the master role")
3738
3739     try:
3740       cluster = self.cfg.GetClusterInfo()
3741       cluster.cluster_name = clustername
3742       cluster.master_ip = new_ip
3743       self.cfg.Update(cluster, feedback_fn)
3744
3745       # update the known hosts file
3746       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3747       node_list = self.cfg.GetOnlineNodeList()
3748       try:
3749         node_list.remove(master_params.name)
3750       except ValueError:
3751         pass
3752       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3753     finally:
3754       master_params.ip = new_ip
3755       result = self.rpc.call_node_activate_master_ip(master_params.name,
3756                                                      master_params, ems)
3757       msg = result.fail_msg
3758       if msg:
3759         self.LogWarning("Could not re-enable the master role on"
3760                         " the master, please restart manually: %s", msg)
3761
3762     return clustername
3763
3764
3765 def _ValidateNetmask(cfg, netmask):
3766   """Checks if a netmask is valid.
3767
3768   @type cfg: L{config.ConfigWriter}
3769   @param cfg: The cluster configuration
3770   @type netmask: int
3771   @param netmask: the netmask to be verified
3772   @raise errors.OpPrereqError: if the validation fails
3773
3774   """
3775   ip_family = cfg.GetPrimaryIPFamily()
3776   try:
3777     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3778   except errors.ProgrammerError:
3779     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3780                                ip_family)
3781   if not ipcls.ValidateNetmask(netmask):
3782     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3783                                 (netmask))
3784
3785
3786 class LUClusterSetParams(LogicalUnit):
3787   """Change the parameters of the cluster.
3788
3789   """
3790   HPATH = "cluster-modify"
3791   HTYPE = constants.HTYPE_CLUSTER
3792   REQ_BGL = False
3793
3794   def CheckArguments(self):
3795     """Check parameters
3796
3797     """
3798     if self.op.uid_pool:
3799       uidpool.CheckUidPool(self.op.uid_pool)
3800
3801     if self.op.add_uids:
3802       uidpool.CheckUidPool(self.op.add_uids)
3803
3804     if self.op.remove_uids:
3805       uidpool.CheckUidPool(self.op.remove_uids)
3806
3807     if self.op.master_netmask is not None:
3808       _ValidateNetmask(self.cfg, self.op.master_netmask)
3809
3810     if self.op.diskparams:
3811       for dt_params in self.op.diskparams.values():
3812         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3813
3814   def ExpandNames(self):
3815     # FIXME: in the future maybe other cluster params won't require checking on
3816     # all nodes to be modified.
3817     self.needed_locks = {
3818       locking.LEVEL_NODE: locking.ALL_SET,
3819       locking.LEVEL_INSTANCE: locking.ALL_SET,
3820       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3821     }
3822     self.share_locks = {
3823         locking.LEVEL_NODE: 1,
3824         locking.LEVEL_INSTANCE: 1,
3825         locking.LEVEL_NODEGROUP: 1,
3826     }
3827
3828   def BuildHooksEnv(self):
3829     """Build hooks env.
3830
3831     """
3832     return {
3833       "OP_TARGET": self.cfg.GetClusterName(),
3834       "NEW_VG_NAME": self.op.vg_name,
3835       }
3836
3837   def BuildHooksNodes(self):
3838     """Build hooks nodes.
3839
3840     """
3841     mn = self.cfg.GetMasterNode()
3842     return ([mn], [mn])
3843
3844   def CheckPrereq(self):
3845     """Check prerequisites.
3846
3847     This checks whether the given params don't conflict and
3848     if the given volume group is valid.
3849
3850     """
3851     if self.op.vg_name is not None and not self.op.vg_name:
3852       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3853         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3854                                    " instances exist", errors.ECODE_INVAL)
3855
3856     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3857       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3858         raise errors.OpPrereqError("Cannot disable drbd helper while"
3859                                    " drbd-based instances exist",
3860                                    errors.ECODE_INVAL)
3861
3862     node_list = self.owned_locks(locking.LEVEL_NODE)
3863
3864     # if vg_name not None, checks given volume group on all nodes
3865     if self.op.vg_name:
3866       vglist = self.rpc.call_vg_list(node_list)
3867       for node in node_list:
3868         msg = vglist[node].fail_msg
3869         if msg:
3870           # ignoring down node
3871           self.LogWarning("Error while gathering data on node %s"
3872                           " (ignoring node): %s", node, msg)
3873           continue
3874         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3875                                               self.op.vg_name,
3876                                               constants.MIN_VG_SIZE)
3877         if vgstatus:
3878           raise errors.OpPrereqError("Error on node '%s': %s" %
3879                                      (node, vgstatus), errors.ECODE_ENVIRON)
3880
3881     if self.op.drbd_helper:
3882       # checks given drbd helper on all nodes
3883       helpers = self.rpc.call_drbd_helper(node_list)
3884       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3885         if ninfo.offline:
3886           self.LogInfo("Not checking drbd helper on offline node %s", node)
3887           continue
3888         msg = helpers[node].fail_msg
3889         if msg:
3890           raise errors.OpPrereqError("Error checking drbd helper on node"
3891                                      " '%s': %s" % (node, msg),
3892                                      errors.ECODE_ENVIRON)
3893         node_helper = helpers[node].payload
3894         if node_helper != self.op.drbd_helper:
3895           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3896                                      (node, node_helper), errors.ECODE_ENVIRON)
3897
3898     self.cluster = cluster = self.cfg.GetClusterInfo()
3899     # validate params changes
3900     if self.op.beparams:
3901       objects.UpgradeBeParams(self.op.beparams)
3902       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3903       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3904
3905     if self.op.ndparams:
3906       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3907       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3908
3909       # TODO: we need a more general way to handle resetting
3910       # cluster-level parameters to default values
3911       if self.new_ndparams["oob_program"] == "":
3912         self.new_ndparams["oob_program"] = \
3913             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3914
3915     if self.op.hv_state:
3916       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3917                                             self.cluster.hv_state_static)
3918       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3919                                for hv, values in new_hv_state.items())
3920
3921     if self.op.disk_state:
3922       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3923                                                 self.cluster.disk_state_static)
3924       self.new_disk_state = \
3925         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3926                             for name, values in svalues.items()))
3927              for storage, svalues in new_disk_state.items())
3928
3929     if self.op.ipolicy:
3930       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3931                                             group_policy=False)
3932
3933       all_instances = self.cfg.GetAllInstancesInfo().values()
3934       violations = set()
3935       for group in self.cfg.GetAllNodeGroupsInfo().values():
3936         instances = frozenset([inst for inst in all_instances
3937                                if compat.any(node in group.members
3938                                              for node in inst.all_nodes)])
3939         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3940         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3941                                                                    group),
3942                                             new_ipolicy, instances)
3943         if new:
3944           violations.update(new)
3945
3946       if violations:
3947         self.LogWarning("After the ipolicy change the following instances"
3948                         " violate them: %s",
3949                         utils.CommaJoin(violations))
3950
3951     if self.op.nicparams:
3952       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3953       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3954       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3955       nic_errors = []
3956
3957       # check all instances for consistency
3958       for instance in self.cfg.GetAllInstancesInfo().values():
3959         for nic_idx, nic in enumerate(instance.nics):
3960           params_copy = copy.deepcopy(nic.nicparams)
3961           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3962
3963           # check parameter syntax
3964           try:
3965             objects.NIC.CheckParameterSyntax(params_filled)
3966           except errors.ConfigurationError, err:
3967             nic_errors.append("Instance %s, nic/%d: %s" %
3968                               (instance.name, nic_idx, err))
3969
3970           # if we're moving instances to routed, check that they have an ip
3971           target_mode = params_filled[constants.NIC_MODE]
3972           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3973             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3974                               " address" % (instance.name, nic_idx))
3975       if nic_errors:
3976         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3977                                    "\n".join(nic_errors))
3978
3979     # hypervisor list/parameters
3980     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3981     if self.op.hvparams:
3982       for hv_name, hv_dict in self.op.hvparams.items():
3983         if hv_name not in self.new_hvparams:
3984           self.new_hvparams[hv_name] = hv_dict
3985         else:
3986           self.new_hvparams[hv_name].update(hv_dict)
3987
3988     # disk template parameters
3989     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3990     if self.op.diskparams:
3991       for dt_name, dt_params in self.op.diskparams.items():
3992         if dt_name not in self.op.diskparams:
3993           self.new_diskparams[dt_name] = dt_params
3994         else:
3995           self.new_diskparams[dt_name].update(dt_params)
3996
3997     # os hypervisor parameters
3998     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3999     if self.op.os_hvp:
4000       for os_name, hvs in self.op.os_hvp.items():
4001         if os_name not in self.new_os_hvp:
4002           self.new_os_hvp[os_name] = hvs
4003         else:
4004           for hv_name, hv_dict in hvs.items():
4005             if hv_name not in self.new_os_hvp[os_name]:
4006               self.new_os_hvp[os_name][hv_name] = hv_dict
4007             else:
4008               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4009
4010     # os parameters
4011     self.new_osp = objects.FillDict(cluster.osparams, {})
4012     if self.op.osparams:
4013       for os_name, osp in self.op.osparams.items():
4014         if os_name not in self.new_osp:
4015           self.new_osp[os_name] = {}
4016
4017         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4018                                                   use_none=True)
4019
4020         if not self.new_osp[os_name]:
4021           # we removed all parameters
4022           del self.new_osp[os_name]
4023         else:
4024           # check the parameter validity (remote check)
4025           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4026                          os_name, self.new_osp[os_name])
4027
4028     # changes to the hypervisor list
4029     if self.op.enabled_hypervisors is not None:
4030       self.hv_list = self.op.enabled_hypervisors
4031       for hv in self.hv_list:
4032         # if the hypervisor doesn't already exist in the cluster
4033         # hvparams, we initialize it to empty, and then (in both
4034         # cases) we make sure to fill the defaults, as we might not
4035         # have a complete defaults list if the hypervisor wasn't
4036         # enabled before
4037         if hv not in new_hvp:
4038           new_hvp[hv] = {}
4039         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4040         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4041     else:
4042       self.hv_list = cluster.enabled_hypervisors
4043
4044     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4045       # either the enabled list has changed, or the parameters have, validate
4046       for hv_name, hv_params in self.new_hvparams.items():
4047         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4048             (self.op.enabled_hypervisors and
4049              hv_name in self.op.enabled_hypervisors)):
4050           # either this is a new hypervisor, or its parameters have changed
4051           hv_class = hypervisor.GetHypervisor(hv_name)
4052           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4053           hv_class.CheckParameterSyntax(hv_params)
4054           _CheckHVParams(self, node_list, hv_name, hv_params)
4055
4056     if self.op.os_hvp:
4057       # no need to check any newly-enabled hypervisors, since the
4058       # defaults have already been checked in the above code-block
4059       for os_name, os_hvp in self.new_os_hvp.items():
4060         for hv_name, hv_params in os_hvp.items():
4061           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4062           # we need to fill in the new os_hvp on top of the actual hv_p
4063           cluster_defaults = self.new_hvparams.get(hv_name, {})
4064           new_osp = objects.FillDict(cluster_defaults, hv_params)
4065           hv_class = hypervisor.GetHypervisor(hv_name)
4066           hv_class.CheckParameterSyntax(new_osp)
4067           _CheckHVParams(self, node_list, hv_name, new_osp)
4068
4069     if self.op.default_iallocator:
4070       alloc_script = utils.FindFile(self.op.default_iallocator,
4071                                     constants.IALLOCATOR_SEARCH_PATH,
4072                                     os.path.isfile)
4073       if alloc_script is None:
4074         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4075                                    " specified" % self.op.default_iallocator,
4076                                    errors.ECODE_INVAL)
4077
4078   def Exec(self, feedback_fn):
4079     """Change the parameters of the cluster.
4080
4081     """
4082     if self.op.vg_name is not None:
4083       new_volume = self.op.vg_name
4084       if not new_volume:
4085         new_volume = None
4086       if new_volume != self.cfg.GetVGName():
4087         self.cfg.SetVGName(new_volume)
4088       else:
4089         feedback_fn("Cluster LVM configuration already in desired"
4090                     " state, not changing")
4091     if self.op.drbd_helper is not None:
4092       new_helper = self.op.drbd_helper
4093       if not new_helper:
4094         new_helper = None
4095       if new_helper != self.cfg.GetDRBDHelper():
4096         self.cfg.SetDRBDHelper(new_helper)
4097       else:
4098         feedback_fn("Cluster DRBD helper already in desired state,"
4099                     " not changing")
4100     if self.op.hvparams:
4101       self.cluster.hvparams = self.new_hvparams
4102     if self.op.os_hvp:
4103       self.cluster.os_hvp = self.new_os_hvp
4104     if self.op.enabled_hypervisors is not None:
4105       self.cluster.hvparams = self.new_hvparams
4106       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4107     if self.op.beparams:
4108       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4109     if self.op.nicparams:
4110       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4111     if self.op.ipolicy:
4112       self.cluster.ipolicy = self.new_ipolicy
4113     if self.op.osparams:
4114       self.cluster.osparams = self.new_osp
4115     if self.op.ndparams:
4116       self.cluster.ndparams = self.new_ndparams
4117     if self.op.diskparams:
4118       self.cluster.diskparams = self.new_diskparams
4119     if self.op.hv_state:
4120       self.cluster.hv_state_static = self.new_hv_state
4121     if self.op.disk_state:
4122       self.cluster.disk_state_static = self.new_disk_state
4123
4124     if self.op.candidate_pool_size is not None:
4125       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4126       # we need to update the pool size here, otherwise the save will fail
4127       _AdjustCandidatePool(self, [])
4128
4129     if self.op.maintain_node_health is not None:
4130       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4131         feedback_fn("Note: CONFD was disabled at build time, node health"
4132                     " maintenance is not useful (still enabling it)")
4133       self.cluster.maintain_node_health = self.op.maintain_node_health
4134
4135     if self.op.prealloc_wipe_disks is not None:
4136       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4137
4138     if self.op.add_uids is not None:
4139       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4140
4141     if self.op.remove_uids is not None:
4142       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4143
4144     if self.op.uid_pool is not None:
4145       self.cluster.uid_pool = self.op.uid_pool
4146
4147     if self.op.default_iallocator is not None:
4148       self.cluster.default_iallocator = self.op.default_iallocator
4149
4150     if self.op.reserved_lvs is not None:
4151       self.cluster.reserved_lvs = self.op.reserved_lvs
4152
4153     if self.op.use_external_mip_script is not None:
4154       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4155
4156     def helper_os(aname, mods, desc):
4157       desc += " OS list"
4158       lst = getattr(self.cluster, aname)
4159       for key, val in mods:
4160         if key == constants.DDM_ADD:
4161           if val in lst:
4162             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4163           else:
4164             lst.append(val)
4165         elif key == constants.DDM_REMOVE:
4166           if val in lst:
4167             lst.remove(val)
4168           else:
4169             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4170         else:
4171           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4172
4173     if self.op.hidden_os:
4174       helper_os("hidden_os", self.op.hidden_os, "hidden")
4175
4176     if self.op.blacklisted_os:
4177       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4178
4179     if self.op.master_netdev:
4180       master_params = self.cfg.GetMasterNetworkParameters()
4181       ems = self.cfg.GetUseExternalMipScript()
4182       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4183                   self.cluster.master_netdev)
4184       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4185                                                        master_params, ems)
4186       result.Raise("Could not disable the master ip")
4187       feedback_fn("Changing master_netdev from %s to %s" %
4188                   (master_params.netdev, self.op.master_netdev))
4189       self.cluster.master_netdev = self.op.master_netdev
4190
4191     if self.op.master_netmask:
4192       master_params = self.cfg.GetMasterNetworkParameters()
4193       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4194       result = self.rpc.call_node_change_master_netmask(master_params.name,
4195                                                         master_params.netmask,
4196                                                         self.op.master_netmask,
4197                                                         master_params.ip,
4198                                                         master_params.netdev)
4199       if result.fail_msg:
4200         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4201         feedback_fn(msg)
4202
4203       self.cluster.master_netmask = self.op.master_netmask
4204
4205     self.cfg.Update(self.cluster, feedback_fn)
4206
4207     if self.op.master_netdev:
4208       master_params = self.cfg.GetMasterNetworkParameters()
4209       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4210                   self.op.master_netdev)
4211       ems = self.cfg.GetUseExternalMipScript()
4212       result = self.rpc.call_node_activate_master_ip(master_params.name,
4213                                                      master_params, ems)
4214       if result.fail_msg:
4215         self.LogWarning("Could not re-enable the master ip on"
4216                         " the master, please restart manually: %s",
4217                         result.fail_msg)
4218
4219
4220 def _UploadHelper(lu, nodes, fname):
4221   """Helper for uploading a file and showing warnings.
4222
4223   """
4224   if os.path.exists(fname):
4225     result = lu.rpc.call_upload_file(nodes, fname)
4226     for to_node, to_result in result.items():
4227       msg = to_result.fail_msg
4228       if msg:
4229         msg = ("Copy of file %s to node %s failed: %s" %
4230                (fname, to_node, msg))
4231         lu.proc.LogWarning(msg)
4232
4233
4234 def _ComputeAncillaryFiles(cluster, redist):
4235   """Compute files external to Ganeti which need to be consistent.
4236
4237   @type redist: boolean
4238   @param redist: Whether to include files which need to be redistributed
4239
4240   """
4241   # Compute files for all nodes
4242   files_all = set([
4243     constants.SSH_KNOWN_HOSTS_FILE,
4244     constants.CONFD_HMAC_KEY,
4245     constants.CLUSTER_DOMAIN_SECRET_FILE,
4246     constants.SPICE_CERT_FILE,
4247     constants.SPICE_CACERT_FILE,
4248     constants.RAPI_USERS_FILE,
4249     ])
4250
4251   if not redist:
4252     files_all.update(constants.ALL_CERT_FILES)
4253     files_all.update(ssconf.SimpleStore().GetFileList())
4254   else:
4255     # we need to ship at least the RAPI certificate
4256     files_all.add(constants.RAPI_CERT_FILE)
4257
4258   if cluster.modify_etc_hosts:
4259     files_all.add(constants.ETC_HOSTS)
4260
4261   # Files which are optional, these must:
4262   # - be present in one other category as well
4263   # - either exist or not exist on all nodes of that category (mc, vm all)
4264   files_opt = set([
4265     constants.RAPI_USERS_FILE,
4266     ])
4267
4268   # Files which should only be on master candidates
4269   files_mc = set()
4270
4271   if not redist:
4272     files_mc.add(constants.CLUSTER_CONF_FILE)
4273
4274     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4275     # replication
4276     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4277
4278   # Files which should only be on VM-capable nodes
4279   files_vm = set(filename
4280     for hv_name in cluster.enabled_hypervisors
4281     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4282
4283   files_opt |= set(filename
4284     for hv_name in cluster.enabled_hypervisors
4285     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4286
4287   # Filenames in each category must be unique
4288   all_files_set = files_all | files_mc | files_vm
4289   assert (len(all_files_set) ==
4290           sum(map(len, [files_all, files_mc, files_vm]))), \
4291          "Found file listed in more than one file list"
4292
4293   # Optional files must be present in one other category
4294   assert all_files_set.issuperset(files_opt), \
4295          "Optional file not in a different required list"
4296
4297   return (files_all, files_opt, files_mc, files_vm)
4298
4299
4300 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4301   """Distribute additional files which are part of the cluster configuration.
4302
4303   ConfigWriter takes care of distributing the config and ssconf files, but
4304   there are more files which should be distributed to all nodes. This function
4305   makes sure those are copied.
4306
4307   @param lu: calling logical unit
4308   @param additional_nodes: list of nodes not in the config to distribute to
4309   @type additional_vm: boolean
4310   @param additional_vm: whether the additional nodes are vm-capable or not
4311
4312   """
4313   # Gather target nodes
4314   cluster = lu.cfg.GetClusterInfo()
4315   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4316
4317   online_nodes = lu.cfg.GetOnlineNodeList()
4318   vm_nodes = lu.cfg.GetVmCapableNodeList()
4319
4320   if additional_nodes is not None:
4321     online_nodes.extend(additional_nodes)
4322     if additional_vm:
4323       vm_nodes.extend(additional_nodes)
4324
4325   # Never distribute to master node
4326   for nodelist in [online_nodes, vm_nodes]:
4327     if master_info.name in nodelist:
4328       nodelist.remove(master_info.name)
4329
4330   # Gather file lists
4331   (files_all, _, files_mc, files_vm) = \
4332     _ComputeAncillaryFiles(cluster, True)
4333
4334   # Never re-distribute configuration file from here
4335   assert not (constants.CLUSTER_CONF_FILE in files_all or
4336               constants.CLUSTER_CONF_FILE in files_vm)
4337   assert not files_mc, "Master candidates not handled in this function"
4338
4339   filemap = [
4340     (online_nodes, files_all),
4341     (vm_nodes, files_vm),
4342     ]
4343
4344   # Upload the files
4345   for (node_list, files) in filemap:
4346     for fname in files:
4347       _UploadHelper(lu, node_list, fname)
4348
4349
4350 class LUClusterRedistConf(NoHooksLU):
4351   """Force the redistribution of cluster configuration.
4352
4353   This is a very simple LU.
4354
4355   """
4356   REQ_BGL = False
4357
4358   def ExpandNames(self):
4359     self.needed_locks = {
4360       locking.LEVEL_NODE: locking.ALL_SET,
4361     }
4362     self.share_locks[locking.LEVEL_NODE] = 1
4363
4364   def Exec(self, feedback_fn):
4365     """Redistribute the configuration.
4366
4367     """
4368     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4369     _RedistributeAncillaryFiles(self)
4370
4371
4372 class LUClusterActivateMasterIp(NoHooksLU):
4373   """Activate the master IP on the master node.
4374
4375   """
4376   def Exec(self, feedback_fn):
4377     """Activate the master IP.
4378
4379     """
4380     master_params = self.cfg.GetMasterNetworkParameters()
4381     ems = self.cfg.GetUseExternalMipScript()
4382     result = self.rpc.call_node_activate_master_ip(master_params.name,
4383                                                    master_params, ems)
4384     result.Raise("Could not activate the master IP")
4385
4386
4387 class LUClusterDeactivateMasterIp(NoHooksLU):
4388   """Deactivate the master IP on the master node.
4389
4390   """
4391   def Exec(self, feedback_fn):
4392     """Deactivate the master IP.
4393
4394     """
4395     master_params = self.cfg.GetMasterNetworkParameters()
4396     ems = self.cfg.GetUseExternalMipScript()
4397     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4398                                                      master_params, ems)
4399     result.Raise("Could not deactivate the master IP")
4400
4401
4402 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4403   """Sleep and poll for an instance's disk to sync.
4404
4405   """
4406   if not instance.disks or disks is not None and not disks:
4407     return True
4408
4409   disks = _ExpandCheckDisks(instance, disks)
4410
4411   if not oneshot:
4412     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4413
4414   node = instance.primary_node
4415
4416   for dev in disks:
4417     lu.cfg.SetDiskID(dev, node)
4418
4419   # TODO: Convert to utils.Retry
4420
4421   retries = 0
4422   degr_retries = 10 # in seconds, as we sleep 1 second each time
4423   while True:
4424     max_time = 0
4425     done = True
4426     cumul_degraded = False
4427     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4428     msg = rstats.fail_msg
4429     if msg:
4430       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4431       retries += 1
4432       if retries >= 10:
4433         raise errors.RemoteError("Can't contact node %s for mirror data,"
4434                                  " aborting." % node)
4435       time.sleep(6)
4436       continue
4437     rstats = rstats.payload
4438     retries = 0
4439     for i, mstat in enumerate(rstats):
4440       if mstat is None:
4441         lu.LogWarning("Can't compute data for node %s/%s",
4442                            node, disks[i].iv_name)
4443         continue
4444
4445       cumul_degraded = (cumul_degraded or
4446                         (mstat.is_degraded and mstat.sync_percent is None))
4447       if mstat.sync_percent is not None:
4448         done = False
4449         if mstat.estimated_time is not None:
4450           rem_time = ("%s remaining (estimated)" %
4451                       utils.FormatSeconds(mstat.estimated_time))
4452           max_time = mstat.estimated_time
4453         else:
4454           rem_time = "no time estimate"
4455         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4456                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4457
4458     # if we're done but degraded, let's do a few small retries, to
4459     # make sure we see a stable and not transient situation; therefore
4460     # we force restart of the loop
4461     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4462       logging.info("Degraded disks found, %d retries left", degr_retries)
4463       degr_retries -= 1
4464       time.sleep(1)
4465       continue
4466
4467     if done or oneshot:
4468       break
4469
4470     time.sleep(min(60, max_time))
4471
4472   if done:
4473     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4474   return not cumul_degraded
4475
4476
4477 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4478   """Check that mirrors are not degraded.
4479
4480   The ldisk parameter, if True, will change the test from the
4481   is_degraded attribute (which represents overall non-ok status for
4482   the device(s)) to the ldisk (representing the local storage status).
4483
4484   """
4485   lu.cfg.SetDiskID(dev, node)
4486
4487   result = True
4488
4489   if on_primary or dev.AssembleOnSecondary():
4490     rstats = lu.rpc.call_blockdev_find(node, dev)
4491     msg = rstats.fail_msg
4492     if msg:
4493       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4494       result = False
4495     elif not rstats.payload:
4496       lu.LogWarning("Can't find disk on node %s", node)
4497       result = False
4498     else:
4499       if ldisk:
4500         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4501       else:
4502         result = result and not rstats.payload.is_degraded
4503
4504   if dev.children:
4505     for child in dev.children:
4506       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4507
4508   return result
4509
4510
4511 class LUOobCommand(NoHooksLU):
4512   """Logical unit for OOB handling.
4513
4514   """
4515   REG_BGL = False
4516   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4517
4518   def ExpandNames(self):
4519     """Gather locks we need.
4520
4521     """
4522     if self.op.node_names:
4523       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4524       lock_names = self.op.node_names
4525     else:
4526       lock_names = locking.ALL_SET
4527
4528     self.needed_locks = {
4529       locking.LEVEL_NODE: lock_names,
4530       }
4531
4532   def CheckPrereq(self):
4533     """Check prerequisites.
4534
4535     This checks:
4536      - the node exists in the configuration
4537      - OOB is supported
4538
4539     Any errors are signaled by raising errors.OpPrereqError.
4540
4541     """
4542     self.nodes = []
4543     self.master_node = self.cfg.GetMasterNode()
4544
4545     assert self.op.power_delay >= 0.0
4546
4547     if self.op.node_names:
4548       if (self.op.command in self._SKIP_MASTER and
4549           self.master_node in self.op.node_names):
4550         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4551         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4552
4553         if master_oob_handler:
4554           additional_text = ("run '%s %s %s' if you want to operate on the"
4555                              " master regardless") % (master_oob_handler,
4556                                                       self.op.command,
4557                                                       self.master_node)
4558         else:
4559           additional_text = "it does not support out-of-band operations"
4560
4561         raise errors.OpPrereqError(("Operating on the master node %s is not"
4562                                     " allowed for %s; %s") %
4563                                    (self.master_node, self.op.command,
4564                                     additional_text), errors.ECODE_INVAL)
4565     else:
4566       self.op.node_names = self.cfg.GetNodeList()
4567       if self.op.command in self._SKIP_MASTER:
4568         self.op.node_names.remove(self.master_node)
4569
4570     if self.op.command in self._SKIP_MASTER:
4571       assert self.master_node not in self.op.node_names
4572
4573     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4574       if node is None:
4575         raise errors.OpPrereqError("Node %s not found" % node_name,
4576                                    errors.ECODE_NOENT)
4577       else:
4578         self.nodes.append(node)
4579
4580       if (not self.op.ignore_status and
4581           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4582         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4583                                     " not marked offline") % node_name,
4584                                    errors.ECODE_STATE)
4585
4586   def Exec(self, feedback_fn):
4587     """Execute OOB and return result if we expect any.
4588
4589     """
4590     master_node = self.master_node
4591     ret = []
4592
4593     for idx, node in enumerate(utils.NiceSort(self.nodes,
4594                                               key=lambda node: node.name)):
4595       node_entry = [(constants.RS_NORMAL, node.name)]
4596       ret.append(node_entry)
4597
4598       oob_program = _SupportsOob(self.cfg, node)
4599
4600       if not oob_program:
4601         node_entry.append((constants.RS_UNAVAIL, None))
4602         continue
4603
4604       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4605                    self.op.command, oob_program, node.name)
4606       result = self.rpc.call_run_oob(master_node, oob_program,
4607                                      self.op.command, node.name,
4608                                      self.op.timeout)
4609
4610       if result.fail_msg:
4611         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4612                         node.name, result.fail_msg)
4613         node_entry.append((constants.RS_NODATA, None))
4614       else:
4615         try:
4616           self._CheckPayload(result)
4617         except errors.OpExecError, err:
4618           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4619                           node.name, err)
4620           node_entry.append((constants.RS_NODATA, None))
4621         else:
4622           if self.op.command == constants.OOB_HEALTH:
4623             # For health we should log important events
4624             for item, status in result.payload:
4625               if status in [constants.OOB_STATUS_WARNING,
4626                             constants.OOB_STATUS_CRITICAL]:
4627                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4628                                 item, node.name, status)
4629
4630           if self.op.command == constants.OOB_POWER_ON:
4631             node.powered = True
4632           elif self.op.command == constants.OOB_POWER_OFF:
4633             node.powered = False
4634           elif self.op.command == constants.OOB_POWER_STATUS:
4635             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4636             if powered != node.powered:
4637               logging.warning(("Recorded power state (%s) of node '%s' does not"
4638                                " match actual power state (%s)"), node.powered,
4639                               node.name, powered)
4640
4641           # For configuration changing commands we should update the node
4642           if self.op.command in (constants.OOB_POWER_ON,
4643                                  constants.OOB_POWER_OFF):
4644             self.cfg.Update(node, feedback_fn)
4645
4646           node_entry.append((constants.RS_NORMAL, result.payload))
4647
4648           if (self.op.command == constants.OOB_POWER_ON and
4649               idx < len(self.nodes) - 1):
4650             time.sleep(self.op.power_delay)
4651
4652     return ret
4653
4654   def _CheckPayload(self, result):
4655     """Checks if the payload is valid.
4656
4657     @param result: RPC result
4658     @raises errors.OpExecError: If payload is not valid
4659
4660     """
4661     errs = []
4662     if self.op.command == constants.OOB_HEALTH:
4663       if not isinstance(result.payload, list):
4664         errs.append("command 'health' is expected to return a list but got %s" %
4665                     type(result.payload))
4666       else:
4667         for item, status in result.payload:
4668           if status not in constants.OOB_STATUSES:
4669             errs.append("health item '%s' has invalid status '%s'" %
4670                         (item, status))
4671
4672     if self.op.command == constants.OOB_POWER_STATUS:
4673       if not isinstance(result.payload, dict):
4674         errs.append("power-status is expected to return a dict but got %s" %
4675                     type(result.payload))
4676
4677     if self.op.command in [
4678         constants.OOB_POWER_ON,
4679         constants.OOB_POWER_OFF,
4680         constants.OOB_POWER_CYCLE,
4681         ]:
4682       if result.payload is not None:
4683         errs.append("%s is expected to not return payload but got '%s'" %
4684                     (self.op.command, result.payload))
4685
4686     if errs:
4687       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4688                                utils.CommaJoin(errs))
4689
4690
4691 class _OsQuery(_QueryBase):
4692   FIELDS = query.OS_FIELDS
4693
4694   def ExpandNames(self, lu):
4695     # Lock all nodes in shared mode
4696     # Temporary removal of locks, should be reverted later
4697     # TODO: reintroduce locks when they are lighter-weight
4698     lu.needed_locks = {}
4699     #self.share_locks[locking.LEVEL_NODE] = 1
4700     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4701
4702     # The following variables interact with _QueryBase._GetNames
4703     if self.names:
4704       self.wanted = self.names
4705     else:
4706       self.wanted = locking.ALL_SET
4707
4708     self.do_locking = self.use_locking
4709
4710   def DeclareLocks(self, lu, level):
4711     pass
4712
4713   @staticmethod
4714   def _DiagnoseByOS(rlist):
4715     """Remaps a per-node return list into an a per-os per-node dictionary
4716
4717     @param rlist: a map with node names as keys and OS objects as values
4718
4719     @rtype: dict
4720     @return: a dictionary with osnames as keys and as value another
4721         map, with nodes as keys and tuples of (path, status, diagnose,
4722         variants, parameters, api_versions) as values, eg::
4723
4724           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4725                                      (/srv/..., False, "invalid api")],
4726                            "node2": [(/srv/..., True, "", [], [])]}
4727           }
4728
4729     """
4730     all_os = {}
4731     # we build here the list of nodes that didn't fail the RPC (at RPC
4732     # level), so that nodes with a non-responding node daemon don't
4733     # make all OSes invalid
4734     good_nodes = [node_name for node_name in rlist
4735                   if not rlist[node_name].fail_msg]
4736     for node_name, nr in rlist.items():
4737       if nr.fail_msg or not nr.payload:
4738         continue
4739       for (name, path, status, diagnose, variants,
4740            params, api_versions) in nr.payload:
4741         if name not in all_os:
4742           # build a list of nodes for this os containing empty lists
4743           # for each node in node_list
4744           all_os[name] = {}
4745           for nname in good_nodes:
4746             all_os[name][nname] = []
4747         # convert params from [name, help] to (name, help)
4748         params = [tuple(v) for v in params]
4749         all_os[name][node_name].append((path, status, diagnose,
4750                                         variants, params, api_versions))
4751     return all_os
4752
4753   def _GetQueryData(self, lu):
4754     """Computes the list of nodes and their attributes.
4755
4756     """
4757     # Locking is not used
4758     assert not (compat.any(lu.glm.is_owned(level)
4759                            for level in locking.LEVELS
4760                            if level != locking.LEVEL_CLUSTER) or
4761                 self.do_locking or self.use_locking)
4762
4763     valid_nodes = [node.name
4764                    for node in lu.cfg.GetAllNodesInfo().values()
4765                    if not node.offline and node.vm_capable]
4766     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4767     cluster = lu.cfg.GetClusterInfo()
4768
4769     data = {}
4770
4771     for (os_name, os_data) in pol.items():
4772       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4773                           hidden=(os_name in cluster.hidden_os),
4774                           blacklisted=(os_name in cluster.blacklisted_os))
4775
4776       variants = set()
4777       parameters = set()
4778       api_versions = set()
4779
4780       for idx, osl in enumerate(os_data.values()):
4781         info.valid = bool(info.valid and osl and osl[0][1])
4782         if not info.valid:
4783           break
4784
4785         (node_variants, node_params, node_api) = osl[0][3:6]
4786         if idx == 0:
4787           # First entry
4788           variants.update(node_variants)
4789           parameters.update(node_params)
4790           api_versions.update(node_api)
4791         else:
4792           # Filter out inconsistent values
4793           variants.intersection_update(node_variants)
4794           parameters.intersection_update(node_params)
4795           api_versions.intersection_update(node_api)
4796
4797       info.variants = list(variants)
4798       info.parameters = list(parameters)
4799       info.api_versions = list(api_versions)
4800
4801       data[os_name] = info
4802
4803     # Prepare data in requested order
4804     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4805             if name in data]
4806
4807
4808 class LUOsDiagnose(NoHooksLU):
4809   """Logical unit for OS diagnose/query.
4810
4811   """
4812   REQ_BGL = False
4813
4814   @staticmethod
4815   def _BuildFilter(fields, names):
4816     """Builds a filter for querying OSes.
4817
4818     """
4819     name_filter = qlang.MakeSimpleFilter("name", names)
4820
4821     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4822     # respective field is not requested
4823     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4824                      for fname in ["hidden", "blacklisted"]
4825                      if fname not in fields]
4826     if "valid" not in fields:
4827       status_filter.append([qlang.OP_TRUE, "valid"])
4828
4829     if status_filter:
4830       status_filter.insert(0, qlang.OP_AND)
4831     else:
4832       status_filter = None
4833
4834     if name_filter and status_filter:
4835       return [qlang.OP_AND, name_filter, status_filter]
4836     elif name_filter:
4837       return name_filter
4838     else:
4839       return status_filter
4840
4841   def CheckArguments(self):
4842     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4843                        self.op.output_fields, False)
4844
4845   def ExpandNames(self):
4846     self.oq.ExpandNames(self)
4847
4848   def Exec(self, feedback_fn):
4849     return self.oq.OldStyleQuery(self)
4850
4851
4852 class LUNodeRemove(LogicalUnit):
4853   """Logical unit for removing a node.
4854
4855   """
4856   HPATH = "node-remove"
4857   HTYPE = constants.HTYPE_NODE
4858
4859   def BuildHooksEnv(self):
4860     """Build hooks env.
4861
4862     """
4863     return {
4864       "OP_TARGET": self.op.node_name,
4865       "NODE_NAME": self.op.node_name,
4866       }
4867
4868   def BuildHooksNodes(self):
4869     """Build hooks nodes.
4870
4871     This doesn't run on the target node in the pre phase as a failed
4872     node would then be impossible to remove.
4873
4874     """
4875     all_nodes = self.cfg.GetNodeList()
4876     try:
4877       all_nodes.remove(self.op.node_name)
4878     except ValueError:
4879       pass
4880     return (all_nodes, all_nodes)
4881
4882   def CheckPrereq(self):
4883     """Check prerequisites.
4884
4885     This checks:
4886      - the node exists in the configuration
4887      - it does not have primary or secondary instances
4888      - it's not the master
4889
4890     Any errors are signaled by raising errors.OpPrereqError.
4891
4892     """
4893     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4894     node = self.cfg.GetNodeInfo(self.op.node_name)
4895     assert node is not None
4896
4897     masternode = self.cfg.GetMasterNode()
4898     if node.name == masternode:
4899       raise errors.OpPrereqError("Node is the master node, failover to another"
4900                                  " node is required", errors.ECODE_INVAL)
4901
4902     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4903       if node.name in instance.all_nodes:
4904         raise errors.OpPrereqError("Instance %s is still running on the node,"
4905                                    " please remove first" % instance_name,
4906                                    errors.ECODE_INVAL)
4907     self.op.node_name = node.name
4908     self.node = node
4909
4910   def Exec(self, feedback_fn):
4911     """Removes the node from the cluster.
4912
4913     """
4914     node = self.node
4915     logging.info("Stopping the node daemon and removing configs from node %s",
4916                  node.name)
4917
4918     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4919
4920     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4921       "Not owning BGL"
4922
4923     # Promote nodes to master candidate as needed
4924     _AdjustCandidatePool(self, exceptions=[node.name])
4925     self.context.RemoveNode(node.name)
4926
4927     # Run post hooks on the node before it's removed
4928     _RunPostHook(self, node.name)
4929
4930     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4931     msg = result.fail_msg
4932     if msg:
4933       self.LogWarning("Errors encountered on the remote node while leaving"
4934                       " the cluster: %s", msg)
4935
4936     # Remove node from our /etc/hosts
4937     if self.cfg.GetClusterInfo().modify_etc_hosts:
4938       master_node = self.cfg.GetMasterNode()
4939       result = self.rpc.call_etc_hosts_modify(master_node,
4940                                               constants.ETC_HOSTS_REMOVE,
4941                                               node.name, None)
4942       result.Raise("Can't update hosts file with new host data")
4943       _RedistributeAncillaryFiles(self)
4944
4945
4946 class _NodeQuery(_QueryBase):
4947   FIELDS = query.NODE_FIELDS
4948
4949   def ExpandNames(self, lu):
4950     lu.needed_locks = {}
4951     lu.share_locks = _ShareAll()
4952
4953     if self.names:
4954       self.wanted = _GetWantedNodes(lu, self.names)
4955     else:
4956       self.wanted = locking.ALL_SET
4957
4958     self.do_locking = (self.use_locking and
4959                        query.NQ_LIVE in self.requested_data)
4960
4961     if self.do_locking:
4962       # If any non-static field is requested we need to lock the nodes
4963       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4964
4965   def DeclareLocks(self, lu, level):
4966     pass
4967
4968   def _GetQueryData(self, lu):
4969     """Computes the list of nodes and their attributes.
4970
4971     """
4972     all_info = lu.cfg.GetAllNodesInfo()
4973
4974     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4975
4976     # Gather data as requested
4977     if query.NQ_LIVE in self.requested_data:
4978       # filter out non-vm_capable nodes
4979       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4980
4981       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4982                                         [lu.cfg.GetHypervisorType()])
4983       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4984                        for (name, nresult) in node_data.items()
4985                        if not nresult.fail_msg and nresult.payload)
4986     else:
4987       live_data = None
4988
4989     if query.NQ_INST in self.requested_data:
4990       node_to_primary = dict([(name, set()) for name in nodenames])
4991       node_to_secondary = dict([(name, set()) for name in nodenames])
4992
4993       inst_data = lu.cfg.GetAllInstancesInfo()
4994
4995       for inst in inst_data.values():
4996         if inst.primary_node in node_to_primary:
4997           node_to_primary[inst.primary_node].add(inst.name)
4998         for secnode in inst.secondary_nodes:
4999           if secnode in node_to_secondary:
5000             node_to_secondary[secnode].add(inst.name)
5001     else:
5002       node_to_primary = None
5003       node_to_secondary = None
5004
5005     if query.NQ_OOB in self.requested_data:
5006       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5007                          for name, node in all_info.iteritems())
5008     else:
5009       oob_support = None
5010
5011     if query.NQ_GROUP in self.requested_data:
5012       groups = lu.cfg.GetAllNodeGroupsInfo()
5013     else:
5014       groups = {}
5015
5016     return query.NodeQueryData([all_info[name] for name in nodenames],
5017                                live_data, lu.cfg.GetMasterNode(),
5018                                node_to_primary, node_to_secondary, groups,
5019                                oob_support, lu.cfg.GetClusterInfo())
5020
5021
5022 class LUNodeQuery(NoHooksLU):
5023   """Logical unit for querying nodes.
5024
5025   """
5026   # pylint: disable=W0142
5027   REQ_BGL = False
5028
5029   def CheckArguments(self):
5030     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5031                          self.op.output_fields, self.op.use_locking)
5032
5033   def ExpandNames(self):
5034     self.nq.ExpandNames(self)
5035
5036   def DeclareLocks(self, level):
5037     self.nq.DeclareLocks(self, level)
5038
5039   def Exec(self, feedback_fn):
5040     return self.nq.OldStyleQuery(self)
5041
5042
5043 class LUNodeQueryvols(NoHooksLU):
5044   """Logical unit for getting volumes on node(s).
5045
5046   """
5047   REQ_BGL = False
5048   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5049   _FIELDS_STATIC = utils.FieldSet("node")
5050
5051   def CheckArguments(self):
5052     _CheckOutputFields(static=self._FIELDS_STATIC,
5053                        dynamic=self._FIELDS_DYNAMIC,
5054                        selected=self.op.output_fields)
5055
5056   def ExpandNames(self):
5057     self.share_locks = _ShareAll()
5058     self.needed_locks = {}
5059
5060     if not self.op.nodes:
5061       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5062     else:
5063       self.needed_locks[locking.LEVEL_NODE] = \
5064         _GetWantedNodes(self, self.op.nodes)
5065
5066   def Exec(self, feedback_fn):
5067     """Computes the list of nodes and their attributes.
5068
5069     """
5070     nodenames = self.owned_locks(locking.LEVEL_NODE)
5071     volumes = self.rpc.call_node_volumes(nodenames)
5072
5073     ilist = self.cfg.GetAllInstancesInfo()
5074     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5075
5076     output = []
5077     for node in nodenames:
5078       nresult = volumes[node]
5079       if nresult.offline:
5080         continue
5081       msg = nresult.fail_msg
5082       if msg:
5083         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5084         continue
5085
5086       node_vols = sorted(nresult.payload,
5087                          key=operator.itemgetter("dev"))
5088
5089       for vol in node_vols:
5090         node_output = []
5091         for field in self.op.output_fields:
5092           if field == "node":
5093             val = node
5094           elif field == "phys":
5095             val = vol["dev"]
5096           elif field == "vg":
5097             val = vol["vg"]
5098           elif field == "name":
5099             val = vol["name"]
5100           elif field == "size":
5101             val = int(float(vol["size"]))
5102           elif field == "instance":
5103             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5104           else:
5105             raise errors.ParameterError(field)
5106           node_output.append(str(val))
5107
5108         output.append(node_output)
5109
5110     return output
5111
5112
5113 class LUNodeQueryStorage(NoHooksLU):
5114   """Logical unit for getting information on storage units on node(s).
5115
5116   """
5117   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5118   REQ_BGL = False
5119
5120   def CheckArguments(self):
5121     _CheckOutputFields(static=self._FIELDS_STATIC,
5122                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5123                        selected=self.op.output_fields)
5124
5125   def ExpandNames(self):
5126     self.share_locks = _ShareAll()
5127     self.needed_locks = {}
5128
5129     if self.op.nodes:
5130       self.needed_locks[locking.LEVEL_NODE] = \
5131         _GetWantedNodes(self, self.op.nodes)
5132     else:
5133       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5134
5135   def Exec(self, feedback_fn):
5136     """Computes the list of nodes and their attributes.
5137
5138     """
5139     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5140
5141     # Always get name to sort by
5142     if constants.SF_NAME in self.op.output_fields:
5143       fields = self.op.output_fields[:]
5144     else:
5145       fields = [constants.SF_NAME] + self.op.output_fields
5146
5147     # Never ask for node or type as it's only known to the LU
5148     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5149       while extra in fields:
5150         fields.remove(extra)
5151
5152     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5153     name_idx = field_idx[constants.SF_NAME]
5154
5155     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5156     data = self.rpc.call_storage_list(self.nodes,
5157                                       self.op.storage_type, st_args,
5158                                       self.op.name, fields)
5159
5160     result = []
5161
5162     for node in utils.NiceSort(self.nodes):
5163       nresult = data[node]
5164       if nresult.offline:
5165         continue
5166
5167       msg = nresult.fail_msg
5168       if msg:
5169         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5170         continue
5171
5172       rows = dict([(row[name_idx], row) for row in nresult.payload])
5173
5174       for name in utils.NiceSort(rows.keys()):
5175         row = rows[name]
5176
5177         out = []
5178
5179         for field in self.op.output_fields:
5180           if field == constants.SF_NODE:
5181             val = node
5182           elif field == constants.SF_TYPE:
5183             val = self.op.storage_type
5184           elif field in field_idx:
5185             val = row[field_idx[field]]
5186           else:
5187             raise errors.ParameterError(field)
5188
5189           out.append(val)
5190
5191         result.append(out)
5192
5193     return result
5194
5195
5196 class _InstanceQuery(_QueryBase):
5197   FIELDS = query.INSTANCE_FIELDS
5198
5199   def ExpandNames(self, lu):
5200     lu.needed_locks = {}
5201     lu.share_locks = _ShareAll()
5202
5203     if self.names:
5204       self.wanted = _GetWantedInstances(lu, self.names)
5205     else:
5206       self.wanted = locking.ALL_SET
5207
5208     self.do_locking = (self.use_locking and
5209                        query.IQ_LIVE in self.requested_data)
5210     if self.do_locking:
5211       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5212       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5213       lu.needed_locks[locking.LEVEL_NODE] = []
5214       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5215
5216     self.do_grouplocks = (self.do_locking and
5217                           query.IQ_NODES in self.requested_data)
5218
5219   def DeclareLocks(self, lu, level):
5220     if self.do_locking:
5221       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5222         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5223
5224         # Lock all groups used by instances optimistically; this requires going
5225         # via the node before it's locked, requiring verification later on
5226         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5227           set(group_uuid
5228               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5229               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5230       elif level == locking.LEVEL_NODE:
5231         lu._LockInstancesNodes() # pylint: disable=W0212
5232
5233   @staticmethod
5234   def _CheckGroupLocks(lu):
5235     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5236     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5237
5238     # Check if node groups for locked instances are still correct
5239     for instance_name in owned_instances:
5240       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5241
5242   def _GetQueryData(self, lu):
5243     """Computes the list of instances and their attributes.
5244
5245     """
5246     if self.do_grouplocks:
5247       self._CheckGroupLocks(lu)
5248
5249     cluster = lu.cfg.GetClusterInfo()
5250     all_info = lu.cfg.GetAllInstancesInfo()
5251
5252     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5253
5254     instance_list = [all_info[name] for name in instance_names]
5255     nodes = frozenset(itertools.chain(*(inst.all_nodes
5256                                         for inst in instance_list)))
5257     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5258     bad_nodes = []
5259     offline_nodes = []
5260     wrongnode_inst = set()
5261
5262     # Gather data as requested
5263     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5264       live_data = {}
5265       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5266       for name in nodes:
5267         result = node_data[name]
5268         if result.offline:
5269           # offline nodes will be in both lists
5270           assert result.fail_msg
5271           offline_nodes.append(name)
5272         if result.fail_msg:
5273           bad_nodes.append(name)
5274         elif result.payload:
5275           for inst in result.payload:
5276             if inst in all_info:
5277               if all_info[inst].primary_node == name:
5278                 live_data.update(result.payload)
5279               else:
5280                 wrongnode_inst.add(inst)
5281             else:
5282               # orphan instance; we don't list it here as we don't
5283               # handle this case yet in the output of instance listing
5284               logging.warning("Orphan instance '%s' found on node %s",
5285                               inst, name)
5286         # else no instance is alive
5287     else:
5288       live_data = {}
5289
5290     if query.IQ_DISKUSAGE in self.requested_data:
5291       disk_usage = dict((inst.name,
5292                          _ComputeDiskSize(inst.disk_template,
5293                                           [{constants.IDISK_SIZE: disk.size}
5294                                            for disk in inst.disks]))
5295                         for inst in instance_list)
5296     else:
5297       disk_usage = None
5298
5299     if query.IQ_CONSOLE in self.requested_data:
5300       consinfo = {}
5301       for inst in instance_list:
5302         if inst.name in live_data:
5303           # Instance is running
5304           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5305         else:
5306           consinfo[inst.name] = None
5307       assert set(consinfo.keys()) == set(instance_names)
5308     else:
5309       consinfo = None
5310
5311     if query.IQ_NODES in self.requested_data:
5312       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5313                                             instance_list)))
5314       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5315       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5316                     for uuid in set(map(operator.attrgetter("group"),
5317                                         nodes.values())))
5318     else:
5319       nodes = None
5320       groups = None
5321
5322     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5323                                    disk_usage, offline_nodes, bad_nodes,
5324                                    live_data, wrongnode_inst, consinfo,
5325                                    nodes, groups)
5326
5327
5328 class LUQuery(NoHooksLU):
5329   """Query for resources/items of a certain kind.
5330
5331   """
5332   # pylint: disable=W0142
5333   REQ_BGL = False
5334
5335   def CheckArguments(self):
5336     qcls = _GetQueryImplementation(self.op.what)
5337
5338     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5339
5340   def ExpandNames(self):
5341     self.impl.ExpandNames(self)
5342
5343   def DeclareLocks(self, level):
5344     self.impl.DeclareLocks(self, level)
5345
5346   def Exec(self, feedback_fn):
5347     return self.impl.NewStyleQuery(self)
5348
5349
5350 class LUQueryFields(NoHooksLU):
5351   """Query for resources/items of a certain kind.
5352
5353   """
5354   # pylint: disable=W0142
5355   REQ_BGL = False
5356
5357   def CheckArguments(self):
5358     self.qcls = _GetQueryImplementation(self.op.what)
5359
5360   def ExpandNames(self):
5361     self.needed_locks = {}
5362
5363   def Exec(self, feedback_fn):
5364     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5365
5366
5367 class LUNodeModifyStorage(NoHooksLU):
5368   """Logical unit for modifying a storage volume on a node.
5369
5370   """
5371   REQ_BGL = False
5372
5373   def CheckArguments(self):
5374     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5375
5376     storage_type = self.op.storage_type
5377
5378     try:
5379       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5380     except KeyError:
5381       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5382                                  " modified" % storage_type,
5383                                  errors.ECODE_INVAL)
5384
5385     diff = set(self.op.changes.keys()) - modifiable
5386     if diff:
5387       raise errors.OpPrereqError("The following fields can not be modified for"
5388                                  " storage units of type '%s': %r" %
5389                                  (storage_type, list(diff)),
5390                                  errors.ECODE_INVAL)
5391
5392   def ExpandNames(self):
5393     self.needed_locks = {
5394       locking.LEVEL_NODE: self.op.node_name,
5395       }
5396
5397   def Exec(self, feedback_fn):
5398     """Computes the list of nodes and their attributes.
5399
5400     """
5401     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5402     result = self.rpc.call_storage_modify(self.op.node_name,
5403                                           self.op.storage_type, st_args,
5404                                           self.op.name, self.op.changes)
5405     result.Raise("Failed to modify storage unit '%s' on %s" %
5406                  (self.op.name, self.op.node_name))
5407
5408
5409 class LUNodeAdd(LogicalUnit):
5410   """Logical unit for adding node to the cluster.
5411
5412   """
5413   HPATH = "node-add"
5414   HTYPE = constants.HTYPE_NODE
5415   _NFLAGS = ["master_capable", "vm_capable"]
5416
5417   def CheckArguments(self):
5418     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5419     # validate/normalize the node name
5420     self.hostname = netutils.GetHostname(name=self.op.node_name,
5421                                          family=self.primary_ip_family)
5422     self.op.node_name = self.hostname.name
5423
5424     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5425       raise errors.OpPrereqError("Cannot readd the master node",
5426                                  errors.ECODE_STATE)
5427
5428     if self.op.readd and self.op.group:
5429       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5430                                  " being readded", errors.ECODE_INVAL)
5431
5432   def BuildHooksEnv(self):
5433     """Build hooks env.
5434
5435     This will run on all nodes before, and on all nodes + the new node after.
5436
5437     """
5438     return {
5439       "OP_TARGET": self.op.node_name,
5440       "NODE_NAME": self.op.node_name,
5441       "NODE_PIP": self.op.primary_ip,
5442       "NODE_SIP": self.op.secondary_ip,
5443       "MASTER_CAPABLE": str(self.op.master_capable),
5444       "VM_CAPABLE": str(self.op.vm_capable),
5445       }
5446
5447   def BuildHooksNodes(self):
5448     """Build hooks nodes.
5449
5450     """
5451     # Exclude added node
5452     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5453     post_nodes = pre_nodes + [self.op.node_name, ]
5454
5455     return (pre_nodes, post_nodes)
5456
5457   def CheckPrereq(self):
5458     """Check prerequisites.
5459
5460     This checks:
5461      - the new node is not already in the config
5462      - it is resolvable
5463      - its parameters (single/dual homed) matches the cluster
5464
5465     Any errors are signaled by raising errors.OpPrereqError.
5466
5467     """
5468     cfg = self.cfg
5469     hostname = self.hostname
5470     node = hostname.name
5471     primary_ip = self.op.primary_ip = hostname.ip
5472     if self.op.secondary_ip is None:
5473       if self.primary_ip_family == netutils.IP6Address.family:
5474         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5475                                    " IPv4 address must be given as secondary",
5476                                    errors.ECODE_INVAL)
5477       self.op.secondary_ip = primary_ip
5478
5479     secondary_ip = self.op.secondary_ip
5480     if not netutils.IP4Address.IsValid(secondary_ip):
5481       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5482                                  " address" % secondary_ip, errors.ECODE_INVAL)
5483
5484     node_list = cfg.GetNodeList()
5485     if not self.op.readd and node in node_list:
5486       raise errors.OpPrereqError("Node %s is already in the configuration" %
5487                                  node, errors.ECODE_EXISTS)
5488     elif self.op.readd and node not in node_list:
5489       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5490                                  errors.ECODE_NOENT)
5491
5492     self.changed_primary_ip = False
5493
5494     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5495       if self.op.readd and node == existing_node_name:
5496         if existing_node.secondary_ip != secondary_ip:
5497           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5498                                      " address configuration as before",
5499                                      errors.ECODE_INVAL)
5500         if existing_node.primary_ip != primary_ip:
5501           self.changed_primary_ip = True
5502
5503         continue
5504
5505       if (existing_node.primary_ip == primary_ip or
5506           existing_node.secondary_ip == primary_ip or
5507           existing_node.primary_ip == secondary_ip or
5508           existing_node.secondary_ip == secondary_ip):
5509         raise errors.OpPrereqError("New node ip address(es) conflict with"
5510                                    " existing node %s" % existing_node.name,
5511                                    errors.ECODE_NOTUNIQUE)
5512
5513     # After this 'if' block, None is no longer a valid value for the
5514     # _capable op attributes
5515     if self.op.readd:
5516       old_node = self.cfg.GetNodeInfo(node)
5517       assert old_node is not None, "Can't retrieve locked node %s" % node
5518       for attr in self._NFLAGS:
5519         if getattr(self.op, attr) is None:
5520           setattr(self.op, attr, getattr(old_node, attr))
5521     else:
5522       for attr in self._NFLAGS:
5523         if getattr(self.op, attr) is None:
5524           setattr(self.op, attr, True)
5525
5526     if self.op.readd and not self.op.vm_capable:
5527       pri, sec = cfg.GetNodeInstances(node)
5528       if pri or sec:
5529         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5530                                    " flag set to false, but it already holds"
5531                                    " instances" % node,
5532                                    errors.ECODE_STATE)
5533
5534     # check that the type of the node (single versus dual homed) is the
5535     # same as for the master
5536     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5537     master_singlehomed = myself.secondary_ip == myself.primary_ip
5538     newbie_singlehomed = secondary_ip == primary_ip
5539     if master_singlehomed != newbie_singlehomed:
5540       if master_singlehomed:
5541         raise errors.OpPrereqError("The master has no secondary ip but the"
5542                                    " new node has one",
5543                                    errors.ECODE_INVAL)
5544       else:
5545         raise errors.OpPrereqError("The master has a secondary ip but the"
5546                                    " new node doesn't have one",
5547                                    errors.ECODE_INVAL)
5548
5549     # checks reachability
5550     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5551       raise errors.OpPrereqError("Node not reachable by ping",
5552                                  errors.ECODE_ENVIRON)
5553
5554     if not newbie_singlehomed:
5555       # check reachability from my secondary ip to newbie's secondary ip
5556       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5557                            source=myself.secondary_ip):
5558         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5559                                    " based ping to node daemon port",
5560                                    errors.ECODE_ENVIRON)
5561
5562     if self.op.readd:
5563       exceptions = [node]
5564     else:
5565       exceptions = []
5566
5567     if self.op.master_capable:
5568       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5569     else:
5570       self.master_candidate = False
5571
5572     if self.op.readd:
5573       self.new_node = old_node
5574     else:
5575       node_group = cfg.LookupNodeGroup(self.op.group)
5576       self.new_node = objects.Node(name=node,
5577                                    primary_ip=primary_ip,
5578                                    secondary_ip=secondary_ip,
5579                                    master_candidate=self.master_candidate,
5580                                    offline=False, drained=False,
5581                                    group=node_group)
5582
5583     if self.op.ndparams:
5584       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5585
5586     if self.op.hv_state:
5587       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5588
5589     if self.op.disk_state:
5590       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5591
5592   def Exec(self, feedback_fn):
5593     """Adds the new node to the cluster.
5594
5595     """
5596     new_node = self.new_node
5597     node = new_node.name
5598
5599     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5600       "Not owning BGL"
5601
5602     # We adding a new node so we assume it's powered
5603     new_node.powered = True
5604
5605     # for re-adds, reset the offline/drained/master-candidate flags;
5606     # we need to reset here, otherwise offline would prevent RPC calls
5607     # later in the procedure; this also means that if the re-add
5608     # fails, we are left with a non-offlined, broken node
5609     if self.op.readd:
5610       new_node.drained = new_node.offline = False # pylint: disable=W0201
5611       self.LogInfo("Readding a node, the offline/drained flags were reset")
5612       # if we demote the node, we do cleanup later in the procedure
5613       new_node.master_candidate = self.master_candidate
5614       if self.changed_primary_ip:
5615         new_node.primary_ip = self.op.primary_ip
5616
5617     # copy the master/vm_capable flags
5618     for attr in self._NFLAGS:
5619       setattr(new_node, attr, getattr(self.op, attr))
5620
5621     # notify the user about any possible mc promotion
5622     if new_node.master_candidate:
5623       self.LogInfo("Node will be a master candidate")
5624
5625     if self.op.ndparams:
5626       new_node.ndparams = self.op.ndparams
5627     else:
5628       new_node.ndparams = {}
5629
5630     if self.op.hv_state:
5631       new_node.hv_state_static = self.new_hv_state
5632
5633     if self.op.disk_state:
5634       new_node.disk_state_static = self.new_disk_state
5635
5636     # check connectivity
5637     result = self.rpc.call_version([node])[node]
5638     result.Raise("Can't get version information from node %s" % node)
5639     if constants.PROTOCOL_VERSION == result.payload:
5640       logging.info("Communication to node %s fine, sw version %s match",
5641                    node, result.payload)
5642     else:
5643       raise errors.OpExecError("Version mismatch master version %s,"
5644                                " node version %s" %
5645                                (constants.PROTOCOL_VERSION, result.payload))
5646
5647     # Add node to our /etc/hosts, and add key to known_hosts
5648     if self.cfg.GetClusterInfo().modify_etc_hosts:
5649       master_node = self.cfg.GetMasterNode()
5650       result = self.rpc.call_etc_hosts_modify(master_node,
5651                                               constants.ETC_HOSTS_ADD,
5652                                               self.hostname.name,
5653                                               self.hostname.ip)
5654       result.Raise("Can't update hosts file with new host data")
5655
5656     if new_node.secondary_ip != new_node.primary_ip:
5657       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5658                                False)
5659
5660     node_verify_list = [self.cfg.GetMasterNode()]
5661     node_verify_param = {
5662       constants.NV_NODELIST: ([node], {}),
5663       # TODO: do a node-net-test as well?
5664     }
5665
5666     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5667                                        self.cfg.GetClusterName())
5668     for verifier in node_verify_list:
5669       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5670       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5671       if nl_payload:
5672         for failed in nl_payload:
5673           feedback_fn("ssh/hostname verification failed"
5674                       " (checking from %s): %s" %
5675                       (verifier, nl_payload[failed]))
5676         raise errors.OpExecError("ssh/hostname verification failed")
5677
5678     if self.op.readd:
5679       _RedistributeAncillaryFiles(self)
5680       self.context.ReaddNode(new_node)
5681       # make sure we redistribute the config
5682       self.cfg.Update(new_node, feedback_fn)
5683       # and make sure the new node will not have old files around
5684       if not new_node.master_candidate:
5685         result = self.rpc.call_node_demote_from_mc(new_node.name)
5686         msg = result.fail_msg
5687         if msg:
5688           self.LogWarning("Node failed to demote itself from master"
5689                           " candidate status: %s" % msg)
5690     else:
5691       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5692                                   additional_vm=self.op.vm_capable)
5693       self.context.AddNode(new_node, self.proc.GetECId())
5694
5695
5696 class LUNodeSetParams(LogicalUnit):
5697   """Modifies the parameters of a node.
5698
5699   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5700       to the node role (as _ROLE_*)
5701   @cvar _R2F: a dictionary from node role to tuples of flags
5702   @cvar _FLAGS: a list of attribute names corresponding to the flags
5703
5704   """
5705   HPATH = "node-modify"
5706   HTYPE = constants.HTYPE_NODE
5707   REQ_BGL = False
5708   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5709   _F2R = {
5710     (True, False, False): _ROLE_CANDIDATE,
5711     (False, True, False): _ROLE_DRAINED,
5712     (False, False, True): _ROLE_OFFLINE,
5713     (False, False, False): _ROLE_REGULAR,
5714     }
5715   _R2F = dict((v, k) for k, v in _F2R.items())
5716   _FLAGS = ["master_candidate", "drained", "offline"]
5717
5718   def CheckArguments(self):
5719     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5720     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5721                 self.op.master_capable, self.op.vm_capable,
5722                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5723                 self.op.disk_state]
5724     if all_mods.count(None) == len(all_mods):
5725       raise errors.OpPrereqError("Please pass at least one modification",
5726                                  errors.ECODE_INVAL)
5727     if all_mods.count(True) > 1:
5728       raise errors.OpPrereqError("Can't set the node into more than one"
5729                                  " state at the same time",
5730                                  errors.ECODE_INVAL)
5731
5732     # Boolean value that tells us whether we might be demoting from MC
5733     self.might_demote = (self.op.master_candidate == False or
5734                          self.op.offline == True or
5735                          self.op.drained == True or
5736                          self.op.master_capable == False)
5737
5738     if self.op.secondary_ip:
5739       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5740         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741                                    " address" % self.op.secondary_ip,
5742                                    errors.ECODE_INVAL)
5743
5744     self.lock_all = self.op.auto_promote and self.might_demote
5745     self.lock_instances = self.op.secondary_ip is not None
5746
5747   def _InstanceFilter(self, instance):
5748     """Filter for getting affected instances.
5749
5750     """
5751     return (instance.disk_template in constants.DTS_INT_MIRROR and
5752             self.op.node_name in instance.all_nodes)
5753
5754   def ExpandNames(self):
5755     if self.lock_all:
5756       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5757     else:
5758       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5759
5760     # Since modifying a node can have severe effects on currently running
5761     # operations the resource lock is at least acquired in shared mode
5762     self.needed_locks[locking.LEVEL_NODE_RES] = \
5763       self.needed_locks[locking.LEVEL_NODE]
5764
5765     # Get node resource and instance locks in shared mode; they are not used
5766     # for anything but read-only access
5767     self.share_locks[locking.LEVEL_NODE_RES] = 1
5768     self.share_locks[locking.LEVEL_INSTANCE] = 1
5769
5770     if self.lock_instances:
5771       self.needed_locks[locking.LEVEL_INSTANCE] = \
5772         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5773
5774   def BuildHooksEnv(self):
5775     """Build hooks env.
5776
5777     This runs on the master node.
5778
5779     """
5780     return {
5781       "OP_TARGET": self.op.node_name,
5782       "MASTER_CANDIDATE": str(self.op.master_candidate),
5783       "OFFLINE": str(self.op.offline),
5784       "DRAINED": str(self.op.drained),
5785       "MASTER_CAPABLE": str(self.op.master_capable),
5786       "VM_CAPABLE": str(self.op.vm_capable),
5787       }
5788
5789   def BuildHooksNodes(self):
5790     """Build hooks nodes.
5791
5792     """
5793     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5794     return (nl, nl)
5795
5796   def CheckPrereq(self):
5797     """Check prerequisites.
5798
5799     This only checks the instance list against the existing names.
5800
5801     """
5802     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5803
5804     if self.lock_instances:
5805       affected_instances = \
5806         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5807
5808       # Verify instance locks
5809       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5810       wanted_instances = frozenset(affected_instances.keys())
5811       if wanted_instances - owned_instances:
5812         raise errors.OpPrereqError("Instances affected by changing node %s's"
5813                                    " secondary IP address have changed since"
5814                                    " locks were acquired, wanted '%s', have"
5815                                    " '%s'; retry the operation" %
5816                                    (self.op.node_name,
5817                                     utils.CommaJoin(wanted_instances),
5818                                     utils.CommaJoin(owned_instances)),
5819                                    errors.ECODE_STATE)
5820     else:
5821       affected_instances = None
5822
5823     if (self.op.master_candidate is not None or
5824         self.op.drained is not None or
5825         self.op.offline is not None):
5826       # we can't change the master's node flags
5827       if self.op.node_name == self.cfg.GetMasterNode():
5828         raise errors.OpPrereqError("The master role can be changed"
5829                                    " only via master-failover",
5830                                    errors.ECODE_INVAL)
5831
5832     if self.op.master_candidate and not node.master_capable:
5833       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5834                                  " it a master candidate" % node.name,
5835                                  errors.ECODE_STATE)
5836
5837     if self.op.vm_capable == False:
5838       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5839       if ipri or isec:
5840         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5841                                    " the vm_capable flag" % node.name,
5842                                    errors.ECODE_STATE)
5843
5844     if node.master_candidate and self.might_demote and not self.lock_all:
5845       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5846       # check if after removing the current node, we're missing master
5847       # candidates
5848       (mc_remaining, mc_should, _) = \
5849           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5850       if mc_remaining < mc_should:
5851         raise errors.OpPrereqError("Not enough master candidates, please"
5852                                    " pass auto promote option to allow"
5853                                    " promotion", errors.ECODE_STATE)
5854
5855     self.old_flags = old_flags = (node.master_candidate,
5856                                   node.drained, node.offline)
5857     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5858     self.old_role = old_role = self._F2R[old_flags]
5859
5860     # Check for ineffective changes
5861     for attr in self._FLAGS:
5862       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5863         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5864         setattr(self.op, attr, None)
5865
5866     # Past this point, any flag change to False means a transition
5867     # away from the respective state, as only real changes are kept
5868
5869     # TODO: We might query the real power state if it supports OOB
5870     if _SupportsOob(self.cfg, node):
5871       if self.op.offline is False and not (node.powered or
5872                                            self.op.powered == True):
5873         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5874                                     " offline status can be reset") %
5875                                    self.op.node_name)
5876     elif self.op.powered is not None:
5877       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5878                                   " as it does not support out-of-band"
5879                                   " handling") % self.op.node_name)
5880
5881     # If we're being deofflined/drained, we'll MC ourself if needed
5882     if (self.op.drained == False or self.op.offline == False or
5883         (self.op.master_capable and not node.master_capable)):
5884       if _DecideSelfPromotion(self):
5885         self.op.master_candidate = True
5886         self.LogInfo("Auto-promoting node to master candidate")
5887
5888     # If we're no longer master capable, we'll demote ourselves from MC
5889     if self.op.master_capable == False and node.master_candidate:
5890       self.LogInfo("Demoting from master candidate")
5891       self.op.master_candidate = False
5892
5893     # Compute new role
5894     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5895     if self.op.master_candidate:
5896       new_role = self._ROLE_CANDIDATE
5897     elif self.op.drained:
5898       new_role = self._ROLE_DRAINED
5899     elif self.op.offline:
5900       new_role = self._ROLE_OFFLINE
5901     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5902       # False is still in new flags, which means we're un-setting (the
5903       # only) True flag
5904       new_role = self._ROLE_REGULAR
5905     else: # no new flags, nothing, keep old role
5906       new_role = old_role
5907
5908     self.new_role = new_role
5909
5910     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5911       # Trying to transition out of offline status
5912       # TODO: Use standard RPC runner, but make sure it works when the node is
5913       # still marked offline
5914       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5915       if result.fail_msg:
5916         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5917                                    " to report its version: %s" %
5918                                    (node.name, result.fail_msg),
5919                                    errors.ECODE_STATE)
5920       else:
5921         self.LogWarning("Transitioning node from offline to online state"
5922                         " without using re-add. Please make sure the node"
5923                         " is healthy!")
5924
5925     if self.op.secondary_ip:
5926       # Ok even without locking, because this can't be changed by any LU
5927       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5928       master_singlehomed = master.secondary_ip == master.primary_ip
5929       if master_singlehomed and self.op.secondary_ip:
5930         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5931                                    " homed cluster", errors.ECODE_INVAL)
5932
5933       assert not (frozenset(affected_instances) -
5934                   self.owned_locks(locking.LEVEL_INSTANCE))
5935
5936       if node.offline:
5937         if affected_instances:
5938           raise errors.OpPrereqError("Cannot change secondary IP address:"
5939                                      " offline node has instances (%s)"
5940                                      " configured to use it" %
5941                                      utils.CommaJoin(affected_instances.keys()))
5942       else:
5943         # On online nodes, check that no instances are running, and that
5944         # the node has the new ip and we can reach it.
5945         for instance in affected_instances.values():
5946           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5947                               msg="cannot change secondary ip")
5948
5949         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5950         if master.name != node.name:
5951           # check reachability from master secondary ip to new secondary ip
5952           if not netutils.TcpPing(self.op.secondary_ip,
5953                                   constants.DEFAULT_NODED_PORT,
5954                                   source=master.secondary_ip):
5955             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5956                                        " based ping to node daemon port",
5957                                        errors.ECODE_ENVIRON)
5958
5959     if self.op.ndparams:
5960       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5961       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5962       self.new_ndparams = new_ndparams
5963
5964     if self.op.hv_state:
5965       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5966                                                  self.node.hv_state_static)
5967
5968     if self.op.disk_state:
5969       self.new_disk_state = \
5970         _MergeAndVerifyDiskState(self.op.disk_state,
5971                                  self.node.disk_state_static)
5972
5973   def Exec(self, feedback_fn):
5974     """Modifies a node.
5975
5976     """
5977     node = self.node
5978     old_role = self.old_role
5979     new_role = self.new_role
5980
5981     result = []
5982
5983     if self.op.ndparams:
5984       node.ndparams = self.new_ndparams
5985
5986     if self.op.powered is not None:
5987       node.powered = self.op.powered
5988
5989     if self.op.hv_state:
5990       node.hv_state_static = self.new_hv_state
5991
5992     if self.op.disk_state:
5993       node.disk_state_static = self.new_disk_state
5994
5995     for attr in ["master_capable", "vm_capable"]:
5996       val = getattr(self.op, attr)
5997       if val is not None:
5998         setattr(node, attr, val)
5999         result.append((attr, str(val)))
6000
6001     if new_role != old_role:
6002       # Tell the node to demote itself, if no longer MC and not offline
6003       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6004         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6005         if msg:
6006           self.LogWarning("Node failed to demote itself: %s", msg)
6007
6008       new_flags = self._R2F[new_role]
6009       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6010         if of != nf:
6011           result.append((desc, str(nf)))
6012       (node.master_candidate, node.drained, node.offline) = new_flags
6013
6014       # we locked all nodes, we adjust the CP before updating this node
6015       if self.lock_all:
6016         _AdjustCandidatePool(self, [node.name])
6017
6018     if self.op.secondary_ip:
6019       node.secondary_ip = self.op.secondary_ip
6020       result.append(("secondary_ip", self.op.secondary_ip))
6021
6022     # this will trigger configuration file update, if needed
6023     self.cfg.Update(node, feedback_fn)
6024
6025     # this will trigger job queue propagation or cleanup if the mc
6026     # flag changed
6027     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6028       self.context.ReaddNode(node)
6029
6030     return result
6031
6032
6033 class LUNodePowercycle(NoHooksLU):
6034   """Powercycles a node.
6035
6036   """
6037   REQ_BGL = False
6038
6039   def CheckArguments(self):
6040     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6041     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6042       raise errors.OpPrereqError("The node is the master and the force"
6043                                  " parameter was not set",
6044                                  errors.ECODE_INVAL)
6045
6046   def ExpandNames(self):
6047     """Locking for PowercycleNode.
6048
6049     This is a last-resort option and shouldn't block on other
6050     jobs. Therefore, we grab no locks.
6051
6052     """
6053     self.needed_locks = {}
6054
6055   def Exec(self, feedback_fn):
6056     """Reboots a node.
6057
6058     """
6059     result = self.rpc.call_node_powercycle(self.op.node_name,
6060                                            self.cfg.GetHypervisorType())
6061     result.Raise("Failed to schedule the reboot")
6062     return result.payload
6063
6064
6065 class LUClusterQuery(NoHooksLU):
6066   """Query cluster configuration.
6067
6068   """
6069   REQ_BGL = False
6070
6071   def ExpandNames(self):
6072     self.needed_locks = {}
6073
6074   def Exec(self, feedback_fn):
6075     """Return cluster config.
6076
6077     """
6078     cluster = self.cfg.GetClusterInfo()
6079     os_hvp = {}
6080
6081     # Filter just for enabled hypervisors
6082     for os_name, hv_dict in cluster.os_hvp.items():
6083       os_hvp[os_name] = {}
6084       for hv_name, hv_params in hv_dict.items():
6085         if hv_name in cluster.enabled_hypervisors:
6086           os_hvp[os_name][hv_name] = hv_params
6087
6088     # Convert ip_family to ip_version
6089     primary_ip_version = constants.IP4_VERSION
6090     if cluster.primary_ip_family == netutils.IP6Address.family:
6091       primary_ip_version = constants.IP6_VERSION
6092
6093     result = {
6094       "software_version": constants.RELEASE_VERSION,
6095       "protocol_version": constants.PROTOCOL_VERSION,
6096       "config_version": constants.CONFIG_VERSION,
6097       "os_api_version": max(constants.OS_API_VERSIONS),
6098       "export_version": constants.EXPORT_VERSION,
6099       "architecture": (platform.architecture()[0], platform.machine()),
6100       "name": cluster.cluster_name,
6101       "master": cluster.master_node,
6102       "default_hypervisor": cluster.primary_hypervisor,
6103       "enabled_hypervisors": cluster.enabled_hypervisors,
6104       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6105                         for hypervisor_name in cluster.enabled_hypervisors]),
6106       "os_hvp": os_hvp,
6107       "beparams": cluster.beparams,
6108       "osparams": cluster.osparams,
6109       "ipolicy": cluster.ipolicy,
6110       "nicparams": cluster.nicparams,
6111       "ndparams": cluster.ndparams,
6112       "candidate_pool_size": cluster.candidate_pool_size,
6113       "master_netdev": cluster.master_netdev,
6114       "master_netmask": cluster.master_netmask,
6115       "use_external_mip_script": cluster.use_external_mip_script,
6116       "volume_group_name": cluster.volume_group_name,
6117       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6118       "file_storage_dir": cluster.file_storage_dir,
6119       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6120       "maintain_node_health": cluster.maintain_node_health,
6121       "ctime": cluster.ctime,
6122       "mtime": cluster.mtime,
6123       "uuid": cluster.uuid,
6124       "tags": list(cluster.GetTags()),
6125       "uid_pool": cluster.uid_pool,
6126       "default_iallocator": cluster.default_iallocator,
6127       "reserved_lvs": cluster.reserved_lvs,
6128       "primary_ip_version": primary_ip_version,
6129       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6130       "hidden_os": cluster.hidden_os,
6131       "blacklisted_os": cluster.blacklisted_os,
6132       }
6133
6134     return result
6135
6136
6137 class LUClusterConfigQuery(NoHooksLU):
6138   """Return configuration values.
6139
6140   """
6141   REQ_BGL = False
6142   _FIELDS_DYNAMIC = utils.FieldSet()
6143   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6144                                   "watcher_pause", "volume_group_name")
6145
6146   def CheckArguments(self):
6147     _CheckOutputFields(static=self._FIELDS_STATIC,
6148                        dynamic=self._FIELDS_DYNAMIC,
6149                        selected=self.op.output_fields)
6150
6151   def ExpandNames(self):
6152     self.needed_locks = {}
6153
6154   def Exec(self, feedback_fn):
6155     """Dump a representation of the cluster config to the standard output.
6156
6157     """
6158     values = []
6159     for field in self.op.output_fields:
6160       if field == "cluster_name":
6161         entry = self.cfg.GetClusterName()
6162       elif field == "master_node":
6163         entry = self.cfg.GetMasterNode()
6164       elif field == "drain_flag":
6165         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6166       elif field == "watcher_pause":
6167         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6168       elif field == "volume_group_name":
6169         entry = self.cfg.GetVGName()
6170       else:
6171         raise errors.ParameterError(field)
6172       values.append(entry)
6173     return values
6174
6175
6176 class LUInstanceActivateDisks(NoHooksLU):
6177   """Bring up an instance's disks.
6178
6179   """
6180   REQ_BGL = False
6181
6182   def ExpandNames(self):
6183     self._ExpandAndLockInstance()
6184     self.needed_locks[locking.LEVEL_NODE] = []
6185     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6186
6187   def DeclareLocks(self, level):
6188     if level == locking.LEVEL_NODE:
6189       self._LockInstancesNodes()
6190
6191   def CheckPrereq(self):
6192     """Check prerequisites.
6193
6194     This checks that the instance is in the cluster.
6195
6196     """
6197     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6198     assert self.instance is not None, \
6199       "Cannot retrieve locked instance %s" % self.op.instance_name
6200     _CheckNodeOnline(self, self.instance.primary_node)
6201
6202   def Exec(self, feedback_fn):
6203     """Activate the disks.
6204
6205     """
6206     disks_ok, disks_info = \
6207               _AssembleInstanceDisks(self, self.instance,
6208                                      ignore_size=self.op.ignore_size)
6209     if not disks_ok:
6210       raise errors.OpExecError("Cannot activate block devices")
6211
6212     return disks_info
6213
6214
6215 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6216                            ignore_size=False):
6217   """Prepare the block devices for an instance.
6218
6219   This sets up the block devices on all nodes.
6220
6221   @type lu: L{LogicalUnit}
6222   @param lu: the logical unit on whose behalf we execute
6223   @type instance: L{objects.Instance}
6224   @param instance: the instance for whose disks we assemble
6225   @type disks: list of L{objects.Disk} or None
6226   @param disks: which disks to assemble (or all, if None)
6227   @type ignore_secondaries: boolean
6228   @param ignore_secondaries: if true, errors on secondary nodes
6229       won't result in an error return from the function
6230   @type ignore_size: boolean
6231   @param ignore_size: if true, the current known size of the disk
6232       will not be used during the disk activation, useful for cases
6233       when the size is wrong
6234   @return: False if the operation failed, otherwise a list of
6235       (host, instance_visible_name, node_visible_name)
6236       with the mapping from node devices to instance devices
6237
6238   """
6239   device_info = []
6240   disks_ok = True
6241   iname = instance.name
6242   disks = _ExpandCheckDisks(instance, disks)
6243
6244   # With the two passes mechanism we try to reduce the window of
6245   # opportunity for the race condition of switching DRBD to primary
6246   # before handshaking occured, but we do not eliminate it
6247
6248   # The proper fix would be to wait (with some limits) until the
6249   # connection has been made and drbd transitions from WFConnection
6250   # into any other network-connected state (Connected, SyncTarget,
6251   # SyncSource, etc.)
6252
6253   # 1st pass, assemble on all nodes in secondary mode
6254   for idx, inst_disk in enumerate(disks):
6255     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6256       if ignore_size:
6257         node_disk = node_disk.Copy()
6258         node_disk.UnsetSize()
6259       lu.cfg.SetDiskID(node_disk, node)
6260       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6261       msg = result.fail_msg
6262       if msg:
6263         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6264                            " (is_primary=False, pass=1): %s",
6265                            inst_disk.iv_name, node, msg)
6266         if not ignore_secondaries:
6267           disks_ok = False
6268
6269   # FIXME: race condition on drbd migration to primary
6270
6271   # 2nd pass, do only the primary node
6272   for idx, inst_disk in enumerate(disks):
6273     dev_path = None
6274
6275     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6276       if node != instance.primary_node:
6277         continue
6278       if ignore_size:
6279         node_disk = node_disk.Copy()
6280         node_disk.UnsetSize()
6281       lu.cfg.SetDiskID(node_disk, node)
6282       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6283       msg = result.fail_msg
6284       if msg:
6285         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6286                            " (is_primary=True, pass=2): %s",
6287                            inst_disk.iv_name, node, msg)
6288         disks_ok = False
6289       else:
6290         dev_path = result.payload
6291
6292     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6293
6294   # leave the disks configured for the primary node
6295   # this is a workaround that would be fixed better by
6296   # improving the logical/physical id handling
6297   for disk in disks:
6298     lu.cfg.SetDiskID(disk, instance.primary_node)
6299
6300   return disks_ok, device_info
6301
6302
6303 def _StartInstanceDisks(lu, instance, force):
6304   """Start the disks of an instance.
6305
6306   """
6307   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6308                                            ignore_secondaries=force)
6309   if not disks_ok:
6310     _ShutdownInstanceDisks(lu, instance)
6311     if force is not None and not force:
6312       lu.proc.LogWarning("", hint="If the message above refers to a"
6313                          " secondary node,"
6314                          " you can retry the operation using '--force'.")
6315     raise errors.OpExecError("Disk consistency error")
6316
6317
6318 class LUInstanceDeactivateDisks(NoHooksLU):
6319   """Shutdown an instance's disks.
6320
6321   """
6322   REQ_BGL = False
6323
6324   def ExpandNames(self):
6325     self._ExpandAndLockInstance()
6326     self.needed_locks[locking.LEVEL_NODE] = []
6327     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6328
6329   def DeclareLocks(self, level):
6330     if level == locking.LEVEL_NODE:
6331       self._LockInstancesNodes()
6332
6333   def CheckPrereq(self):
6334     """Check prerequisites.
6335
6336     This checks that the instance is in the cluster.
6337
6338     """
6339     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6340     assert self.instance is not None, \
6341       "Cannot retrieve locked instance %s" % self.op.instance_name
6342
6343   def Exec(self, feedback_fn):
6344     """Deactivate the disks
6345
6346     """
6347     instance = self.instance
6348     if self.op.force:
6349       _ShutdownInstanceDisks(self, instance)
6350     else:
6351       _SafeShutdownInstanceDisks(self, instance)
6352
6353
6354 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6355   """Shutdown block devices of an instance.
6356
6357   This function checks if an instance is running, before calling
6358   _ShutdownInstanceDisks.
6359
6360   """
6361   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6362   _ShutdownInstanceDisks(lu, instance, disks=disks)
6363
6364
6365 def _ExpandCheckDisks(instance, disks):
6366   """Return the instance disks selected by the disks list
6367
6368   @type disks: list of L{objects.Disk} or None
6369   @param disks: selected disks
6370   @rtype: list of L{objects.Disk}
6371   @return: selected instance disks to act on
6372
6373   """
6374   if disks is None:
6375     return instance.disks
6376   else:
6377     if not set(disks).issubset(instance.disks):
6378       raise errors.ProgrammerError("Can only act on disks belonging to the"
6379                                    " target instance")
6380     return disks
6381
6382
6383 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6384   """Shutdown block devices of an instance.
6385
6386   This does the shutdown on all nodes of the instance.
6387
6388   If the ignore_primary is false, errors on the primary node are
6389   ignored.
6390
6391   """
6392   all_result = True
6393   disks = _ExpandCheckDisks(instance, disks)
6394
6395   for disk in disks:
6396     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6397       lu.cfg.SetDiskID(top_disk, node)
6398       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6399       msg = result.fail_msg
6400       if msg:
6401         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6402                       disk.iv_name, node, msg)
6403         if ((node == instance.primary_node and not ignore_primary) or
6404             (node != instance.primary_node and not result.offline)):
6405           all_result = False
6406   return all_result
6407
6408
6409 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6410   """Checks if a node has enough free memory.
6411
6412   This function check if a given node has the needed amount of free
6413   memory. In case the node has less memory or we cannot get the
6414   information from the node, this function raise an OpPrereqError
6415   exception.
6416
6417   @type lu: C{LogicalUnit}
6418   @param lu: a logical unit from which we get configuration data
6419   @type node: C{str}
6420   @param node: the node to check
6421   @type reason: C{str}
6422   @param reason: string to use in the error message
6423   @type requested: C{int}
6424   @param requested: the amount of memory in MiB to check for
6425   @type hypervisor_name: C{str}
6426   @param hypervisor_name: the hypervisor to ask for memory stats
6427   @rtype: integer
6428   @return: node current free memory
6429   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6430       we cannot check the node
6431
6432   """
6433   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6434   nodeinfo[node].Raise("Can't get data from node %s" % node,
6435                        prereq=True, ecode=errors.ECODE_ENVIRON)
6436   (_, _, (hv_info, )) = nodeinfo[node].payload
6437
6438   free_mem = hv_info.get("memory_free", None)
6439   if not isinstance(free_mem, int):
6440     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6441                                " was '%s'" % (node, free_mem),
6442                                errors.ECODE_ENVIRON)
6443   if requested > free_mem:
6444     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6445                                " needed %s MiB, available %s MiB" %
6446                                (node, reason, requested, free_mem),
6447                                errors.ECODE_NORES)
6448   return free_mem
6449
6450
6451 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6452   """Checks if nodes have enough free disk space in the all VGs.
6453
6454   This function check if all given nodes have the needed amount of
6455   free disk. In case any node has less disk or we cannot get the
6456   information from the node, this function raise an OpPrereqError
6457   exception.
6458
6459   @type lu: C{LogicalUnit}
6460   @param lu: a logical unit from which we get configuration data
6461   @type nodenames: C{list}
6462   @param nodenames: the list of node names to check
6463   @type req_sizes: C{dict}
6464   @param req_sizes: the hash of vg and corresponding amount of disk in
6465       MiB to check for
6466   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6467       or we cannot check the node
6468
6469   """
6470   for vg, req_size in req_sizes.items():
6471     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6472
6473
6474 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6475   """Checks if nodes have enough free disk space in the specified VG.
6476
6477   This function check if all given nodes have the needed amount of
6478   free disk. In case any node has less disk or we cannot get the
6479   information from the node, this function raise an OpPrereqError
6480   exception.
6481
6482   @type lu: C{LogicalUnit}
6483   @param lu: a logical unit from which we get configuration data
6484   @type nodenames: C{list}
6485   @param nodenames: the list of node names to check
6486   @type vg: C{str}
6487   @param vg: the volume group to check
6488   @type requested: C{int}
6489   @param requested: the amount of disk in MiB to check for
6490   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6491       or we cannot check the node
6492
6493   """
6494   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6495   for node in nodenames:
6496     info = nodeinfo[node]
6497     info.Raise("Cannot get current information from node %s" % node,
6498                prereq=True, ecode=errors.ECODE_ENVIRON)
6499     (_, (vg_info, ), _) = info.payload
6500     vg_free = vg_info.get("vg_free", None)
6501     if not isinstance(vg_free, int):
6502       raise errors.OpPrereqError("Can't compute free disk space on node"
6503                                  " %s for vg %s, result was '%s'" %
6504                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6505     if requested > vg_free:
6506       raise errors.OpPrereqError("Not enough disk space on target node %s"
6507                                  " vg %s: required %d MiB, available %d MiB" %
6508                                  (node, vg, requested, vg_free),
6509                                  errors.ECODE_NORES)
6510
6511
6512 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6513   """Checks if nodes have enough physical CPUs
6514
6515   This function checks if all given nodes have the needed number of
6516   physical CPUs. In case any node has less CPUs or we cannot get the
6517   information from the node, this function raises an OpPrereqError
6518   exception.
6519
6520   @type lu: C{LogicalUnit}
6521   @param lu: a logical unit from which we get configuration data
6522   @type nodenames: C{list}
6523   @param nodenames: the list of node names to check
6524   @type requested: C{int}
6525   @param requested: the minimum acceptable number of physical CPUs
6526   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6527       or we cannot check the node
6528
6529   """
6530   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6531   for node in nodenames:
6532     info = nodeinfo[node]
6533     info.Raise("Cannot get current information from node %s" % node,
6534                prereq=True, ecode=errors.ECODE_ENVIRON)
6535     (_, _, (hv_info, )) = info.payload
6536     num_cpus = hv_info.get("cpu_total", None)
6537     if not isinstance(num_cpus, int):
6538       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6539                                  " on node %s, result was '%s'" %
6540                                  (node, num_cpus), errors.ECODE_ENVIRON)
6541     if requested > num_cpus:
6542       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6543                                  "required" % (node, num_cpus, requested),
6544                                  errors.ECODE_NORES)
6545
6546
6547 class LUInstanceStartup(LogicalUnit):
6548   """Starts an instance.
6549
6550   """
6551   HPATH = "instance-start"
6552   HTYPE = constants.HTYPE_INSTANCE
6553   REQ_BGL = False
6554
6555   def CheckArguments(self):
6556     # extra beparams
6557     if self.op.beparams:
6558       # fill the beparams dict
6559       objects.UpgradeBeParams(self.op.beparams)
6560       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6561
6562   def ExpandNames(self):
6563     self._ExpandAndLockInstance()
6564     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6565
6566   def DeclareLocks(self, level):
6567     if level == locking.LEVEL_NODE_RES:
6568       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6569
6570   def BuildHooksEnv(self):
6571     """Build hooks env.
6572
6573     This runs on master, primary and secondary nodes of the instance.
6574
6575     """
6576     env = {
6577       "FORCE": self.op.force,
6578       }
6579
6580     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6581
6582     return env
6583
6584   def BuildHooksNodes(self):
6585     """Build hooks nodes.
6586
6587     """
6588     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6589     return (nl, nl)
6590
6591   def CheckPrereq(self):
6592     """Check prerequisites.
6593
6594     This checks that the instance is in the cluster.
6595
6596     """
6597     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6598     assert self.instance is not None, \
6599       "Cannot retrieve locked instance %s" % self.op.instance_name
6600
6601     # extra hvparams
6602     if self.op.hvparams:
6603       # check hypervisor parameter syntax (locally)
6604       cluster = self.cfg.GetClusterInfo()
6605       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6606       filled_hvp = cluster.FillHV(instance)
6607       filled_hvp.update(self.op.hvparams)
6608       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6609       hv_type.CheckParameterSyntax(filled_hvp)
6610       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6611
6612     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6613
6614     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6615
6616     if self.primary_offline and self.op.ignore_offline_nodes:
6617       self.proc.LogWarning("Ignoring offline primary node")
6618
6619       if self.op.hvparams or self.op.beparams:
6620         self.proc.LogWarning("Overridden parameters are ignored")
6621     else:
6622       _CheckNodeOnline(self, instance.primary_node)
6623
6624       bep = self.cfg.GetClusterInfo().FillBE(instance)
6625       bep.update(self.op.beparams)
6626
6627       # check bridges existence
6628       _CheckInstanceBridgesExist(self, instance)
6629
6630       remote_info = self.rpc.call_instance_info(instance.primary_node,
6631                                                 instance.name,
6632                                                 instance.hypervisor)
6633       remote_info.Raise("Error checking node %s" % instance.primary_node,
6634                         prereq=True, ecode=errors.ECODE_ENVIRON)
6635       if not remote_info.payload: # not running already
6636         _CheckNodeFreeMemory(self, instance.primary_node,
6637                              "starting instance %s" % instance.name,
6638                              bep[constants.BE_MINMEM], instance.hypervisor)
6639
6640   def Exec(self, feedback_fn):
6641     """Start the instance.
6642
6643     """
6644     instance = self.instance
6645     force = self.op.force
6646
6647     if not self.op.no_remember:
6648       self.cfg.MarkInstanceUp(instance.name)
6649
6650     if self.primary_offline:
6651       assert self.op.ignore_offline_nodes
6652       self.proc.LogInfo("Primary node offline, marked instance as started")
6653     else:
6654       node_current = instance.primary_node
6655
6656       _StartInstanceDisks(self, instance, force)
6657
6658       result = \
6659         self.rpc.call_instance_start(node_current,
6660                                      (instance, self.op.hvparams,
6661                                       self.op.beparams),
6662                                      self.op.startup_paused)
6663       msg = result.fail_msg
6664       if msg:
6665         _ShutdownInstanceDisks(self, instance)
6666         raise errors.OpExecError("Could not start instance: %s" % msg)
6667
6668
6669 class LUInstanceReboot(LogicalUnit):
6670   """Reboot an instance.
6671
6672   """
6673   HPATH = "instance-reboot"
6674   HTYPE = constants.HTYPE_INSTANCE
6675   REQ_BGL = False
6676
6677   def ExpandNames(self):
6678     self._ExpandAndLockInstance()
6679
6680   def BuildHooksEnv(self):
6681     """Build hooks env.
6682
6683     This runs on master, primary and secondary nodes of the instance.
6684
6685     """
6686     env = {
6687       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6688       "REBOOT_TYPE": self.op.reboot_type,
6689       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6690       }
6691
6692     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6693
6694     return env
6695
6696   def BuildHooksNodes(self):
6697     """Build hooks nodes.
6698
6699     """
6700     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6701     return (nl, nl)
6702
6703   def CheckPrereq(self):
6704     """Check prerequisites.
6705
6706     This checks that the instance is in the cluster.
6707
6708     """
6709     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6710     assert self.instance is not None, \
6711       "Cannot retrieve locked instance %s" % self.op.instance_name
6712     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6713     _CheckNodeOnline(self, instance.primary_node)
6714
6715     # check bridges existence
6716     _CheckInstanceBridgesExist(self, instance)
6717
6718   def Exec(self, feedback_fn):
6719     """Reboot the instance.
6720
6721     """
6722     instance = self.instance
6723     ignore_secondaries = self.op.ignore_secondaries
6724     reboot_type = self.op.reboot_type
6725
6726     remote_info = self.rpc.call_instance_info(instance.primary_node,
6727                                               instance.name,
6728                                               instance.hypervisor)
6729     remote_info.Raise("Error checking node %s" % instance.primary_node)
6730     instance_running = bool(remote_info.payload)
6731
6732     node_current = instance.primary_node
6733
6734     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6735                                             constants.INSTANCE_REBOOT_HARD]:
6736       for disk in instance.disks:
6737         self.cfg.SetDiskID(disk, node_current)
6738       result = self.rpc.call_instance_reboot(node_current, instance,
6739                                              reboot_type,
6740                                              self.op.shutdown_timeout)
6741       result.Raise("Could not reboot instance")
6742     else:
6743       if instance_running:
6744         result = self.rpc.call_instance_shutdown(node_current, instance,
6745                                                  self.op.shutdown_timeout)
6746         result.Raise("Could not shutdown instance for full reboot")
6747         _ShutdownInstanceDisks(self, instance)
6748       else:
6749         self.LogInfo("Instance %s was already stopped, starting now",
6750                      instance.name)
6751       _StartInstanceDisks(self, instance, ignore_secondaries)
6752       result = self.rpc.call_instance_start(node_current,
6753                                             (instance, None, None), False)
6754       msg = result.fail_msg
6755       if msg:
6756         _ShutdownInstanceDisks(self, instance)
6757         raise errors.OpExecError("Could not start instance for"
6758                                  " full reboot: %s" % msg)
6759
6760     self.cfg.MarkInstanceUp(instance.name)
6761
6762
6763 class LUInstanceShutdown(LogicalUnit):
6764   """Shutdown an instance.
6765
6766   """
6767   HPATH = "instance-stop"
6768   HTYPE = constants.HTYPE_INSTANCE
6769   REQ_BGL = False
6770
6771   def ExpandNames(self):
6772     self._ExpandAndLockInstance()
6773
6774   def BuildHooksEnv(self):
6775     """Build hooks env.
6776
6777     This runs on master, primary and secondary nodes of the instance.
6778
6779     """
6780     env = _BuildInstanceHookEnvByObject(self, self.instance)
6781     env["TIMEOUT"] = self.op.timeout
6782     return env
6783
6784   def BuildHooksNodes(self):
6785     """Build hooks nodes.
6786
6787     """
6788     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6789     return (nl, nl)
6790
6791   def CheckPrereq(self):
6792     """Check prerequisites.
6793
6794     This checks that the instance is in the cluster.
6795
6796     """
6797     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798     assert self.instance is not None, \
6799       "Cannot retrieve locked instance %s" % self.op.instance_name
6800
6801     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6802
6803     self.primary_offline = \
6804       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6805
6806     if self.primary_offline and self.op.ignore_offline_nodes:
6807       self.proc.LogWarning("Ignoring offline primary node")
6808     else:
6809       _CheckNodeOnline(self, self.instance.primary_node)
6810
6811   def Exec(self, feedback_fn):
6812     """Shutdown the instance.
6813
6814     """
6815     instance = self.instance
6816     node_current = instance.primary_node
6817     timeout = self.op.timeout
6818
6819     if not self.op.no_remember:
6820       self.cfg.MarkInstanceDown(instance.name)
6821
6822     if self.primary_offline:
6823       assert self.op.ignore_offline_nodes
6824       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6825     else:
6826       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6827       msg = result.fail_msg
6828       if msg:
6829         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6830
6831       _ShutdownInstanceDisks(self, instance)
6832
6833
6834 class LUInstanceReinstall(LogicalUnit):
6835   """Reinstall an instance.
6836
6837   """
6838   HPATH = "instance-reinstall"
6839   HTYPE = constants.HTYPE_INSTANCE
6840   REQ_BGL = False
6841
6842   def ExpandNames(self):
6843     self._ExpandAndLockInstance()
6844
6845   def BuildHooksEnv(self):
6846     """Build hooks env.
6847
6848     This runs on master, primary and secondary nodes of the instance.
6849
6850     """
6851     return _BuildInstanceHookEnvByObject(self, self.instance)
6852
6853   def BuildHooksNodes(self):
6854     """Build hooks nodes.
6855
6856     """
6857     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6858     return (nl, nl)
6859
6860   def CheckPrereq(self):
6861     """Check prerequisites.
6862
6863     This checks that the instance is in the cluster and is not running.
6864
6865     """
6866     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6867     assert instance is not None, \
6868       "Cannot retrieve locked instance %s" % self.op.instance_name
6869     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6870                      " offline, cannot reinstall")
6871     for node in instance.secondary_nodes:
6872       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6873                        " cannot reinstall")
6874
6875     if instance.disk_template == constants.DT_DISKLESS:
6876       raise errors.OpPrereqError("Instance '%s' has no disks" %
6877                                  self.op.instance_name,
6878                                  errors.ECODE_INVAL)
6879     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6880
6881     if self.op.os_type is not None:
6882       # OS verification
6883       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6884       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6885       instance_os = self.op.os_type
6886     else:
6887       instance_os = instance.os
6888
6889     nodelist = list(instance.all_nodes)
6890
6891     if self.op.osparams:
6892       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6893       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6894       self.os_inst = i_osdict # the new dict (without defaults)
6895     else:
6896       self.os_inst = None
6897
6898     self.instance = instance
6899
6900   def Exec(self, feedback_fn):
6901     """Reinstall the instance.
6902
6903     """
6904     inst = self.instance
6905
6906     if self.op.os_type is not None:
6907       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6908       inst.os = self.op.os_type
6909       # Write to configuration
6910       self.cfg.Update(inst, feedback_fn)
6911
6912     _StartInstanceDisks(self, inst, None)
6913     try:
6914       feedback_fn("Running the instance OS create scripts...")
6915       # FIXME: pass debug option from opcode to backend
6916       result = self.rpc.call_instance_os_add(inst.primary_node,
6917                                              (inst, self.os_inst), True,
6918                                              self.op.debug_level)
6919       result.Raise("Could not install OS for instance %s on node %s" %
6920                    (inst.name, inst.primary_node))
6921     finally:
6922       _ShutdownInstanceDisks(self, inst)
6923
6924
6925 class LUInstanceRecreateDisks(LogicalUnit):
6926   """Recreate an instance's missing disks.
6927
6928   """
6929   HPATH = "instance-recreate-disks"
6930   HTYPE = constants.HTYPE_INSTANCE
6931   REQ_BGL = False
6932
6933   _MODIFYABLE = frozenset([
6934     constants.IDISK_SIZE,
6935     constants.IDISK_MODE,
6936     ])
6937
6938   # New or changed disk parameters may have different semantics
6939   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6940     constants.IDISK_ADOPT,
6941
6942     # TODO: Implement support changing VG while recreating
6943     constants.IDISK_VG,
6944     constants.IDISK_METAVG,
6945     ]))
6946
6947   def CheckArguments(self):
6948     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6949       # Normalize and convert deprecated list of disk indices
6950       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6951
6952     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6953     if duplicates:
6954       raise errors.OpPrereqError("Some disks have been specified more than"
6955                                  " once: %s" % utils.CommaJoin(duplicates),
6956                                  errors.ECODE_INVAL)
6957
6958     for (idx, params) in self.op.disks:
6959       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6960       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6961       if unsupported:
6962         raise errors.OpPrereqError("Parameters for disk %s try to change"
6963                                    " unmodifyable parameter(s): %s" %
6964                                    (idx, utils.CommaJoin(unsupported)),
6965                                    errors.ECODE_INVAL)
6966
6967   def ExpandNames(self):
6968     self._ExpandAndLockInstance()
6969     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6970     if self.op.nodes:
6971       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6972       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6973     else:
6974       self.needed_locks[locking.LEVEL_NODE] = []
6975     self.needed_locks[locking.LEVEL_NODE_RES] = []
6976
6977   def DeclareLocks(self, level):
6978     if level == locking.LEVEL_NODE:
6979       # if we replace the nodes, we only need to lock the old primary,
6980       # otherwise we need to lock all nodes for disk re-creation
6981       primary_only = bool(self.op.nodes)
6982       self._LockInstancesNodes(primary_only=primary_only)
6983     elif level == locking.LEVEL_NODE_RES:
6984       # Copy node locks
6985       self.needed_locks[locking.LEVEL_NODE_RES] = \
6986         self.needed_locks[locking.LEVEL_NODE][:]
6987
6988   def BuildHooksEnv(self):
6989     """Build hooks env.
6990
6991     This runs on master, primary and secondary nodes of the instance.
6992
6993     """
6994     return _BuildInstanceHookEnvByObject(self, self.instance)
6995
6996   def BuildHooksNodes(self):
6997     """Build hooks nodes.
6998
6999     """
7000     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7001     return (nl, nl)
7002
7003   def CheckPrereq(self):
7004     """Check prerequisites.
7005
7006     This checks that the instance is in the cluster and is not running.
7007
7008     """
7009     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7010     assert instance is not None, \
7011       "Cannot retrieve locked instance %s" % self.op.instance_name
7012     if self.op.nodes:
7013       if len(self.op.nodes) != len(instance.all_nodes):
7014         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7015                                    " %d replacement nodes were specified" %
7016                                    (instance.name, len(instance.all_nodes),
7017                                     len(self.op.nodes)),
7018                                    errors.ECODE_INVAL)
7019       assert instance.disk_template != constants.DT_DRBD8 or \
7020           len(self.op.nodes) == 2
7021       assert instance.disk_template != constants.DT_PLAIN or \
7022           len(self.op.nodes) == 1
7023       primary_node = self.op.nodes[0]
7024     else:
7025       primary_node = instance.primary_node
7026     _CheckNodeOnline(self, primary_node)
7027
7028     if instance.disk_template == constants.DT_DISKLESS:
7029       raise errors.OpPrereqError("Instance '%s' has no disks" %
7030                                  self.op.instance_name, errors.ECODE_INVAL)
7031
7032     # if we replace nodes *and* the old primary is offline, we don't
7033     # check
7034     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7035     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7036     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7037     if not (self.op.nodes and old_pnode.offline):
7038       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7039                           msg="cannot recreate disks")
7040
7041     if self.op.disks:
7042       self.disks = dict(self.op.disks)
7043     else:
7044       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7045
7046     maxidx = max(self.disks.keys())
7047     if maxidx >= len(instance.disks):
7048       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7049                                  errors.ECODE_INVAL)
7050
7051     if (self.op.nodes and
7052         sorted(self.disks.keys()) != range(len(instance.disks))):
7053       raise errors.OpPrereqError("Can't recreate disks partially and"
7054                                  " change the nodes at the same time",
7055                                  errors.ECODE_INVAL)
7056
7057     self.instance = instance
7058
7059   def Exec(self, feedback_fn):
7060     """Recreate the disks.
7061
7062     """
7063     instance = self.instance
7064
7065     assert (self.owned_locks(locking.LEVEL_NODE) ==
7066             self.owned_locks(locking.LEVEL_NODE_RES))
7067
7068     to_skip = []
7069     mods = [] # keeps track of needed changes
7070
7071     for idx, disk in enumerate(instance.disks):
7072       try:
7073         changes = self.disks[idx]
7074       except KeyError:
7075         # Disk should not be recreated
7076         to_skip.append(idx)
7077         continue
7078
7079       # update secondaries for disks, if needed
7080       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7081         # need to update the nodes and minors
7082         assert len(self.op.nodes) == 2
7083         assert len(disk.logical_id) == 6 # otherwise disk internals
7084                                          # have changed
7085         (_, _, old_port, _, _, old_secret) = disk.logical_id
7086         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7087         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7088                   new_minors[0], new_minors[1], old_secret)
7089         assert len(disk.logical_id) == len(new_id)
7090       else:
7091         new_id = None
7092
7093       mods.append((idx, new_id, changes))
7094
7095     # now that we have passed all asserts above, we can apply the mods
7096     # in a single run (to avoid partial changes)
7097     for idx, new_id, changes in mods:
7098       disk = instance.disks[idx]
7099       if new_id is not None:
7100         assert disk.dev_type == constants.LD_DRBD8
7101         disk.logical_id = new_id
7102       if changes:
7103         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7104                     mode=changes.get(constants.IDISK_MODE, None))
7105
7106     # change primary node, if needed
7107     if self.op.nodes:
7108       instance.primary_node = self.op.nodes[0]
7109       self.LogWarning("Changing the instance's nodes, you will have to"
7110                       " remove any disks left on the older nodes manually")
7111
7112     if self.op.nodes:
7113       self.cfg.Update(instance, feedback_fn)
7114
7115     _CreateDisks(self, instance, to_skip=to_skip)
7116
7117
7118 class LUInstanceRename(LogicalUnit):
7119   """Rename an instance.
7120
7121   """
7122   HPATH = "instance-rename"
7123   HTYPE = constants.HTYPE_INSTANCE
7124
7125   def CheckArguments(self):
7126     """Check arguments.
7127
7128     """
7129     if self.op.ip_check and not self.op.name_check:
7130       # TODO: make the ip check more flexible and not depend on the name check
7131       raise errors.OpPrereqError("IP address check requires a name check",
7132                                  errors.ECODE_INVAL)
7133
7134   def BuildHooksEnv(self):
7135     """Build hooks env.
7136
7137     This runs on master, primary and secondary nodes of the instance.
7138
7139     """
7140     env = _BuildInstanceHookEnvByObject(self, self.instance)
7141     env["INSTANCE_NEW_NAME"] = self.op.new_name
7142     return env
7143
7144   def BuildHooksNodes(self):
7145     """Build hooks nodes.
7146
7147     """
7148     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7149     return (nl, nl)
7150
7151   def CheckPrereq(self):
7152     """Check prerequisites.
7153
7154     This checks that the instance is in the cluster and is not running.
7155
7156     """
7157     self.op.instance_name = _ExpandInstanceName(self.cfg,
7158                                                 self.op.instance_name)
7159     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7160     assert instance is not None
7161     _CheckNodeOnline(self, instance.primary_node)
7162     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7163                         msg="cannot rename")
7164     self.instance = instance
7165
7166     new_name = self.op.new_name
7167     if self.op.name_check:
7168       hostname = netutils.GetHostname(name=new_name)
7169       if hostname.name != new_name:
7170         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7171                      hostname.name)
7172       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7173         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7174                                     " same as given hostname '%s'") %
7175                                     (hostname.name, self.op.new_name),
7176                                     errors.ECODE_INVAL)
7177       new_name = self.op.new_name = hostname.name
7178       if (self.op.ip_check and
7179           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7180         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7181                                    (hostname.ip, new_name),
7182                                    errors.ECODE_NOTUNIQUE)
7183
7184     instance_list = self.cfg.GetInstanceList()
7185     if new_name in instance_list and new_name != instance.name:
7186       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7187                                  new_name, errors.ECODE_EXISTS)
7188
7189   def Exec(self, feedback_fn):
7190     """Rename the instance.
7191
7192     """
7193     inst = self.instance
7194     old_name = inst.name
7195
7196     rename_file_storage = False
7197     if (inst.disk_template in constants.DTS_FILEBASED and
7198         self.op.new_name != inst.name):
7199       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7200       rename_file_storage = True
7201
7202     self.cfg.RenameInstance(inst.name, self.op.new_name)
7203     # Change the instance lock. This is definitely safe while we hold the BGL.
7204     # Otherwise the new lock would have to be added in acquired mode.
7205     assert self.REQ_BGL
7206     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7207     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7208
7209     # re-read the instance from the configuration after rename
7210     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7211
7212     if rename_file_storage:
7213       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7214       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7215                                                      old_file_storage_dir,
7216                                                      new_file_storage_dir)
7217       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7218                    " (but the instance has been renamed in Ganeti)" %
7219                    (inst.primary_node, old_file_storage_dir,
7220                     new_file_storage_dir))
7221
7222     _StartInstanceDisks(self, inst, None)
7223     try:
7224       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7225                                                  old_name, self.op.debug_level)
7226       msg = result.fail_msg
7227       if msg:
7228         msg = ("Could not run OS rename script for instance %s on node %s"
7229                " (but the instance has been renamed in Ganeti): %s" %
7230                (inst.name, inst.primary_node, msg))
7231         self.proc.LogWarning(msg)
7232     finally:
7233       _ShutdownInstanceDisks(self, inst)
7234
7235     return inst.name
7236
7237
7238 class LUInstanceRemove(LogicalUnit):
7239   """Remove an instance.
7240
7241   """
7242   HPATH = "instance-remove"
7243   HTYPE = constants.HTYPE_INSTANCE
7244   REQ_BGL = False
7245
7246   def ExpandNames(self):
7247     self._ExpandAndLockInstance()
7248     self.needed_locks[locking.LEVEL_NODE] = []
7249     self.needed_locks[locking.LEVEL_NODE_RES] = []
7250     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7251
7252   def DeclareLocks(self, level):
7253     if level == locking.LEVEL_NODE:
7254       self._LockInstancesNodes()
7255     elif level == locking.LEVEL_NODE_RES:
7256       # Copy node locks
7257       self.needed_locks[locking.LEVEL_NODE_RES] = \
7258         self.needed_locks[locking.LEVEL_NODE][:]
7259
7260   def BuildHooksEnv(self):
7261     """Build hooks env.
7262
7263     This runs on master, primary and secondary nodes of the instance.
7264
7265     """
7266     env = _BuildInstanceHookEnvByObject(self, self.instance)
7267     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7268     return env
7269
7270   def BuildHooksNodes(self):
7271     """Build hooks nodes.
7272
7273     """
7274     nl = [self.cfg.GetMasterNode()]
7275     nl_post = list(self.instance.all_nodes) + nl
7276     return (nl, nl_post)
7277
7278   def CheckPrereq(self):
7279     """Check prerequisites.
7280
7281     This checks that the instance is in the cluster.
7282
7283     """
7284     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7285     assert self.instance is not None, \
7286       "Cannot retrieve locked instance %s" % self.op.instance_name
7287
7288   def Exec(self, feedback_fn):
7289     """Remove the instance.
7290
7291     """
7292     instance = self.instance
7293     logging.info("Shutting down instance %s on node %s",
7294                  instance.name, instance.primary_node)
7295
7296     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7297                                              self.op.shutdown_timeout)
7298     msg = result.fail_msg
7299     if msg:
7300       if self.op.ignore_failures:
7301         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7302       else:
7303         raise errors.OpExecError("Could not shutdown instance %s on"
7304                                  " node %s: %s" %
7305                                  (instance.name, instance.primary_node, msg))
7306
7307     assert (self.owned_locks(locking.LEVEL_NODE) ==
7308             self.owned_locks(locking.LEVEL_NODE_RES))
7309     assert not (set(instance.all_nodes) -
7310                 self.owned_locks(locking.LEVEL_NODE)), \
7311       "Not owning correct locks"
7312
7313     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7314
7315
7316 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7317   """Utility function to remove an instance.
7318
7319   """
7320   logging.info("Removing block devices for instance %s", instance.name)
7321
7322   if not _RemoveDisks(lu, instance):
7323     if not ignore_failures:
7324       raise errors.OpExecError("Can't remove instance's disks")
7325     feedback_fn("Warning: can't remove instance's disks")
7326
7327   logging.info("Removing instance %s out of cluster config", instance.name)
7328
7329   lu.cfg.RemoveInstance(instance.name)
7330
7331   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7332     "Instance lock removal conflict"
7333
7334   # Remove lock for the instance
7335   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7336
7337
7338 class LUInstanceQuery(NoHooksLU):
7339   """Logical unit for querying instances.
7340
7341   """
7342   # pylint: disable=W0142
7343   REQ_BGL = False
7344
7345   def CheckArguments(self):
7346     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7347                              self.op.output_fields, self.op.use_locking)
7348
7349   def ExpandNames(self):
7350     self.iq.ExpandNames(self)
7351
7352   def DeclareLocks(self, level):
7353     self.iq.DeclareLocks(self, level)
7354
7355   def Exec(self, feedback_fn):
7356     return self.iq.OldStyleQuery(self)
7357
7358
7359 class LUInstanceFailover(LogicalUnit):
7360   """Failover an instance.
7361
7362   """
7363   HPATH = "instance-failover"
7364   HTYPE = constants.HTYPE_INSTANCE
7365   REQ_BGL = False
7366
7367   def CheckArguments(self):
7368     """Check the arguments.
7369
7370     """
7371     self.iallocator = getattr(self.op, "iallocator", None)
7372     self.target_node = getattr(self.op, "target_node", None)
7373
7374   def ExpandNames(self):
7375     self._ExpandAndLockInstance()
7376
7377     if self.op.target_node is not None:
7378       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7379
7380     self.needed_locks[locking.LEVEL_NODE] = []
7381     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7382
7383     self.needed_locks[locking.LEVEL_NODE_RES] = []
7384     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7385
7386     ignore_consistency = self.op.ignore_consistency
7387     shutdown_timeout = self.op.shutdown_timeout
7388     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7389                                        cleanup=False,
7390                                        failover=True,
7391                                        ignore_consistency=ignore_consistency,
7392                                        shutdown_timeout=shutdown_timeout,
7393                                        ignore_ipolicy=self.op.ignore_ipolicy)
7394     self.tasklets = [self._migrater]
7395
7396   def DeclareLocks(self, level):
7397     if level == locking.LEVEL_NODE:
7398       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7399       if instance.disk_template in constants.DTS_EXT_MIRROR:
7400         if self.op.target_node is None:
7401           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7402         else:
7403           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7404                                                    self.op.target_node]
7405         del self.recalculate_locks[locking.LEVEL_NODE]
7406       else:
7407         self._LockInstancesNodes()
7408     elif level == locking.LEVEL_NODE_RES:
7409       # Copy node locks
7410       self.needed_locks[locking.LEVEL_NODE_RES] = \
7411         self.needed_locks[locking.LEVEL_NODE][:]
7412
7413   def BuildHooksEnv(self):
7414     """Build hooks env.
7415
7416     This runs on master, primary and secondary nodes of the instance.
7417
7418     """
7419     instance = self._migrater.instance
7420     source_node = instance.primary_node
7421     target_node = self.op.target_node
7422     env = {
7423       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7424       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7425       "OLD_PRIMARY": source_node,
7426       "NEW_PRIMARY": target_node,
7427       }
7428
7429     if instance.disk_template in constants.DTS_INT_MIRROR:
7430       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7431       env["NEW_SECONDARY"] = source_node
7432     else:
7433       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7434
7435     env.update(_BuildInstanceHookEnvByObject(self, instance))
7436
7437     return env
7438
7439   def BuildHooksNodes(self):
7440     """Build hooks nodes.
7441
7442     """
7443     instance = self._migrater.instance
7444     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7445     return (nl, nl + [instance.primary_node])
7446
7447
7448 class LUInstanceMigrate(LogicalUnit):
7449   """Migrate an instance.
7450
7451   This is migration without shutting down, compared to the failover,
7452   which is done with shutdown.
7453
7454   """
7455   HPATH = "instance-migrate"
7456   HTYPE = constants.HTYPE_INSTANCE
7457   REQ_BGL = False
7458
7459   def ExpandNames(self):
7460     self._ExpandAndLockInstance()
7461
7462     if self.op.target_node is not None:
7463       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7464
7465     self.needed_locks[locking.LEVEL_NODE] = []
7466     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7467
7468     self.needed_locks[locking.LEVEL_NODE] = []
7469     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7470
7471     self._migrater = \
7472       TLMigrateInstance(self, self.op.instance_name,
7473                         cleanup=self.op.cleanup,
7474                         failover=False,
7475                         fallback=self.op.allow_failover,
7476                         allow_runtime_changes=self.op.allow_runtime_changes,
7477                         ignore_ipolicy=self.op.ignore_ipolicy)
7478     self.tasklets = [self._migrater]
7479
7480   def DeclareLocks(self, level):
7481     if level == locking.LEVEL_NODE:
7482       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7483       if instance.disk_template in constants.DTS_EXT_MIRROR:
7484         if self.op.target_node is None:
7485           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7486         else:
7487           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7488                                                    self.op.target_node]
7489         del self.recalculate_locks[locking.LEVEL_NODE]
7490       else:
7491         self._LockInstancesNodes()
7492     elif level == locking.LEVEL_NODE_RES:
7493       # Copy node locks
7494       self.needed_locks[locking.LEVEL_NODE_RES] = \
7495         self.needed_locks[locking.LEVEL_NODE][:]
7496
7497   def BuildHooksEnv(self):
7498     """Build hooks env.
7499
7500     This runs on master, primary and secondary nodes of the instance.
7501
7502     """
7503     instance = self._migrater.instance
7504     source_node = instance.primary_node
7505     target_node = self.op.target_node
7506     env = _BuildInstanceHookEnvByObject(self, instance)
7507     env.update({
7508       "MIGRATE_LIVE": self._migrater.live,
7509       "MIGRATE_CLEANUP": self.op.cleanup,
7510       "OLD_PRIMARY": source_node,
7511       "NEW_PRIMARY": target_node,
7512       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7513       })
7514
7515     if instance.disk_template in constants.DTS_INT_MIRROR:
7516       env["OLD_SECONDARY"] = target_node
7517       env["NEW_SECONDARY"] = source_node
7518     else:
7519       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7520
7521     return env
7522
7523   def BuildHooksNodes(self):
7524     """Build hooks nodes.
7525
7526     """
7527     instance = self._migrater.instance
7528     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7529     return (nl, nl + [instance.primary_node])
7530
7531
7532 class LUInstanceMove(LogicalUnit):
7533   """Move an instance by data-copying.
7534
7535   """
7536   HPATH = "instance-move"
7537   HTYPE = constants.HTYPE_INSTANCE
7538   REQ_BGL = False
7539
7540   def ExpandNames(self):
7541     self._ExpandAndLockInstance()
7542     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7543     self.op.target_node = target_node
7544     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7545     self.needed_locks[locking.LEVEL_NODE_RES] = []
7546     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7547
7548   def DeclareLocks(self, level):
7549     if level == locking.LEVEL_NODE:
7550       self._LockInstancesNodes(primary_only=True)
7551     elif level == locking.LEVEL_NODE_RES:
7552       # Copy node locks
7553       self.needed_locks[locking.LEVEL_NODE_RES] = \
7554         self.needed_locks[locking.LEVEL_NODE][:]
7555
7556   def BuildHooksEnv(self):
7557     """Build hooks env.
7558
7559     This runs on master, primary and secondary nodes of the instance.
7560
7561     """
7562     env = {
7563       "TARGET_NODE": self.op.target_node,
7564       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7565       }
7566     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7567     return env
7568
7569   def BuildHooksNodes(self):
7570     """Build hooks nodes.
7571
7572     """
7573     nl = [
7574       self.cfg.GetMasterNode(),
7575       self.instance.primary_node,
7576       self.op.target_node,
7577       ]
7578     return (nl, nl)
7579
7580   def CheckPrereq(self):
7581     """Check prerequisites.
7582
7583     This checks that the instance is in the cluster.
7584
7585     """
7586     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7587     assert self.instance is not None, \
7588       "Cannot retrieve locked instance %s" % self.op.instance_name
7589
7590     node = self.cfg.GetNodeInfo(self.op.target_node)
7591     assert node is not None, \
7592       "Cannot retrieve locked node %s" % self.op.target_node
7593
7594     self.target_node = target_node = node.name
7595
7596     if target_node == instance.primary_node:
7597       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7598                                  (instance.name, target_node),
7599                                  errors.ECODE_STATE)
7600
7601     bep = self.cfg.GetClusterInfo().FillBE(instance)
7602
7603     for idx, dsk in enumerate(instance.disks):
7604       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7605         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7606                                    " cannot copy" % idx, errors.ECODE_STATE)
7607
7608     _CheckNodeOnline(self, target_node)
7609     _CheckNodeNotDrained(self, target_node)
7610     _CheckNodeVmCapable(self, target_node)
7611     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7612                                      self.cfg.GetNodeGroup(node.group))
7613     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7614                             ignore=self.op.ignore_ipolicy)
7615
7616     if instance.admin_state == constants.ADMINST_UP:
7617       # check memory requirements on the secondary node
7618       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7619                            instance.name, bep[constants.BE_MAXMEM],
7620                            instance.hypervisor)
7621     else:
7622       self.LogInfo("Not checking memory on the secondary node as"
7623                    " instance will not be started")
7624
7625     # check bridge existance
7626     _CheckInstanceBridgesExist(self, instance, node=target_node)
7627
7628   def Exec(self, feedback_fn):
7629     """Move an instance.
7630
7631     The move is done by shutting it down on its present node, copying
7632     the data over (slow) and starting it on the new node.
7633
7634     """
7635     instance = self.instance
7636
7637     source_node = instance.primary_node
7638     target_node = self.target_node
7639
7640     self.LogInfo("Shutting down instance %s on source node %s",
7641                  instance.name, source_node)
7642
7643     assert (self.owned_locks(locking.LEVEL_NODE) ==
7644             self.owned_locks(locking.LEVEL_NODE_RES))
7645
7646     result = self.rpc.call_instance_shutdown(source_node, instance,
7647                                              self.op.shutdown_timeout)
7648     msg = result.fail_msg
7649     if msg:
7650       if self.op.ignore_consistency:
7651         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7652                              " Proceeding anyway. Please make sure node"
7653                              " %s is down. Error details: %s",
7654                              instance.name, source_node, source_node, msg)
7655       else:
7656         raise errors.OpExecError("Could not shutdown instance %s on"
7657                                  " node %s: %s" %
7658                                  (instance.name, source_node, msg))
7659
7660     # create the target disks
7661     try:
7662       _CreateDisks(self, instance, target_node=target_node)
7663     except errors.OpExecError:
7664       self.LogWarning("Device creation failed, reverting...")
7665       try:
7666         _RemoveDisks(self, instance, target_node=target_node)
7667       finally:
7668         self.cfg.ReleaseDRBDMinors(instance.name)
7669         raise
7670
7671     cluster_name = self.cfg.GetClusterInfo().cluster_name
7672
7673     errs = []
7674     # activate, get path, copy the data over
7675     for idx, disk in enumerate(instance.disks):
7676       self.LogInfo("Copying data for disk %d", idx)
7677       result = self.rpc.call_blockdev_assemble(target_node, disk,
7678                                                instance.name, True, idx)
7679       if result.fail_msg:
7680         self.LogWarning("Can't assemble newly created disk %d: %s",
7681                         idx, result.fail_msg)
7682         errs.append(result.fail_msg)
7683         break
7684       dev_path = result.payload
7685       result = self.rpc.call_blockdev_export(source_node, disk,
7686                                              target_node, dev_path,
7687                                              cluster_name)
7688       if result.fail_msg:
7689         self.LogWarning("Can't copy data over for disk %d: %s",
7690                         idx, result.fail_msg)
7691         errs.append(result.fail_msg)
7692         break
7693
7694     if errs:
7695       self.LogWarning("Some disks failed to copy, aborting")
7696       try:
7697         _RemoveDisks(self, instance, target_node=target_node)
7698       finally:
7699         self.cfg.ReleaseDRBDMinors(instance.name)
7700         raise errors.OpExecError("Errors during disk copy: %s" %
7701                                  (",".join(errs),))
7702
7703     instance.primary_node = target_node
7704     self.cfg.Update(instance, feedback_fn)
7705
7706     self.LogInfo("Removing the disks on the original node")
7707     _RemoveDisks(self, instance, target_node=source_node)
7708
7709     # Only start the instance if it's marked as up
7710     if instance.admin_state == constants.ADMINST_UP:
7711       self.LogInfo("Starting instance %s on node %s",
7712                    instance.name, target_node)
7713
7714       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7715                                            ignore_secondaries=True)
7716       if not disks_ok:
7717         _ShutdownInstanceDisks(self, instance)
7718         raise errors.OpExecError("Can't activate the instance's disks")
7719
7720       result = self.rpc.call_instance_start(target_node,
7721                                             (instance, None, None), False)
7722       msg = result.fail_msg
7723       if msg:
7724         _ShutdownInstanceDisks(self, instance)
7725         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7726                                  (instance.name, target_node, msg))
7727
7728
7729 class LUNodeMigrate(LogicalUnit):
7730   """Migrate all instances from a node.
7731
7732   """
7733   HPATH = "node-migrate"
7734   HTYPE = constants.HTYPE_NODE
7735   REQ_BGL = False
7736
7737   def CheckArguments(self):
7738     pass
7739
7740   def ExpandNames(self):
7741     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7742
7743     self.share_locks = _ShareAll()
7744     self.needed_locks = {
7745       locking.LEVEL_NODE: [self.op.node_name],
7746       }
7747
7748   def BuildHooksEnv(self):
7749     """Build hooks env.
7750
7751     This runs on the master, the primary and all the secondaries.
7752
7753     """
7754     return {
7755       "NODE_NAME": self.op.node_name,
7756       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7757       }
7758
7759   def BuildHooksNodes(self):
7760     """Build hooks nodes.
7761
7762     """
7763     nl = [self.cfg.GetMasterNode()]
7764     return (nl, nl)
7765
7766   def CheckPrereq(self):
7767     pass
7768
7769   def Exec(self, feedback_fn):
7770     # Prepare jobs for migration instances
7771     allow_runtime_changes = self.op.allow_runtime_changes
7772     jobs = [
7773       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7774                                  mode=self.op.mode,
7775                                  live=self.op.live,
7776                                  iallocator=self.op.iallocator,
7777                                  target_node=self.op.target_node,
7778                                  allow_runtime_changes=allow_runtime_changes,
7779                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7780       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7781       ]
7782
7783     # TODO: Run iallocator in this opcode and pass correct placement options to
7784     # OpInstanceMigrate. Since other jobs can modify the cluster between
7785     # running the iallocator and the actual migration, a good consistency model
7786     # will have to be found.
7787
7788     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7789             frozenset([self.op.node_name]))
7790
7791     return ResultWithJobs(jobs)
7792
7793
7794 class TLMigrateInstance(Tasklet):
7795   """Tasklet class for instance migration.
7796
7797   @type live: boolean
7798   @ivar live: whether the migration will be done live or non-live;
7799       this variable is initalized only after CheckPrereq has run
7800   @type cleanup: boolean
7801   @ivar cleanup: Wheater we cleanup from a failed migration
7802   @type iallocator: string
7803   @ivar iallocator: The iallocator used to determine target_node
7804   @type target_node: string
7805   @ivar target_node: If given, the target_node to reallocate the instance to
7806   @type failover: boolean
7807   @ivar failover: Whether operation results in failover or migration
7808   @type fallback: boolean
7809   @ivar fallback: Whether fallback to failover is allowed if migration not
7810                   possible
7811   @type ignore_consistency: boolean
7812   @ivar ignore_consistency: Wheter we should ignore consistency between source
7813                             and target node
7814   @type shutdown_timeout: int
7815   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7816   @type ignore_ipolicy: bool
7817   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7818
7819   """
7820
7821   # Constants
7822   _MIGRATION_POLL_INTERVAL = 1      # seconds
7823   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7824
7825   def __init__(self, lu, instance_name, cleanup=False,
7826                failover=False, fallback=False,
7827                ignore_consistency=False,
7828                allow_runtime_changes=True,
7829                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7830                ignore_ipolicy=False):
7831     """Initializes this class.
7832
7833     """
7834     Tasklet.__init__(self, lu)
7835
7836     # Parameters
7837     self.instance_name = instance_name
7838     self.cleanup = cleanup
7839     self.live = False # will be overridden later
7840     self.failover = failover
7841     self.fallback = fallback
7842     self.ignore_consistency = ignore_consistency
7843     self.shutdown_timeout = shutdown_timeout
7844     self.ignore_ipolicy = ignore_ipolicy
7845     self.allow_runtime_changes = allow_runtime_changes
7846
7847   def CheckPrereq(self):
7848     """Check prerequisites.
7849
7850     This checks that the instance is in the cluster.
7851
7852     """
7853     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7854     instance = self.cfg.GetInstanceInfo(instance_name)
7855     assert instance is not None
7856     self.instance = instance
7857     cluster = self.cfg.GetClusterInfo()
7858
7859     if (not self.cleanup and
7860         not instance.admin_state == constants.ADMINST_UP and
7861         not self.failover and self.fallback):
7862       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7863                       " switching to failover")
7864       self.failover = True
7865
7866     if instance.disk_template not in constants.DTS_MIRRORED:
7867       if self.failover:
7868         text = "failovers"
7869       else:
7870         text = "migrations"
7871       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7872                                  " %s" % (instance.disk_template, text),
7873                                  errors.ECODE_STATE)
7874
7875     if instance.disk_template in constants.DTS_EXT_MIRROR:
7876       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7877
7878       if self.lu.op.iallocator:
7879         self._RunAllocator()
7880       else:
7881         # We set set self.target_node as it is required by
7882         # BuildHooksEnv
7883         self.target_node = self.lu.op.target_node
7884
7885       # Check that the target node is correct in terms of instance policy
7886       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7887       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7888       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7889       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7890                               ignore=self.ignore_ipolicy)
7891
7892       # self.target_node is already populated, either directly or by the
7893       # iallocator run
7894       target_node = self.target_node
7895       if self.target_node == instance.primary_node:
7896         raise errors.OpPrereqError("Cannot migrate instance %s"
7897                                    " to its primary (%s)" %
7898                                    (instance.name, instance.primary_node))
7899
7900       if len(self.lu.tasklets) == 1:
7901         # It is safe to release locks only when we're the only tasklet
7902         # in the LU
7903         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7904                       keep=[instance.primary_node, self.target_node])
7905
7906     else:
7907       secondary_nodes = instance.secondary_nodes
7908       if not secondary_nodes:
7909         raise errors.ConfigurationError("No secondary node but using"
7910                                         " %s disk template" %
7911                                         instance.disk_template)
7912       target_node = secondary_nodes[0]
7913       if self.lu.op.iallocator or (self.lu.op.target_node and
7914                                    self.lu.op.target_node != target_node):
7915         if self.failover:
7916           text = "failed over"
7917         else:
7918           text = "migrated"
7919         raise errors.OpPrereqError("Instances with disk template %s cannot"
7920                                    " be %s to arbitrary nodes"
7921                                    " (neither an iallocator nor a target"
7922                                    " node can be passed)" %
7923                                    (instance.disk_template, text),
7924                                    errors.ECODE_INVAL)
7925       nodeinfo = self.cfg.GetNodeInfo(target_node)
7926       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7927       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7928       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7929                               ignore=self.ignore_ipolicy)
7930
7931     i_be = cluster.FillBE(instance)
7932
7933     # check memory requirements on the secondary node
7934     if (not self.cleanup and
7935          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7936       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7937                                                "migrating instance %s" %
7938                                                instance.name,
7939                                                i_be[constants.BE_MINMEM],
7940                                                instance.hypervisor)
7941     else:
7942       self.lu.LogInfo("Not checking memory on the secondary node as"
7943                       " instance will not be started")
7944
7945     # check if failover must be forced instead of migration
7946     if (not self.cleanup and not self.failover and
7947         i_be[constants.BE_ALWAYS_FAILOVER]):
7948       if self.fallback:
7949         self.lu.LogInfo("Instance configured to always failover; fallback"
7950                         " to failover")
7951         self.failover = True
7952       else:
7953         raise errors.OpPrereqError("This instance has been configured to"
7954                                    " always failover, please allow failover",
7955                                    errors.ECODE_STATE)
7956
7957     # check bridge existance
7958     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7959
7960     if not self.cleanup:
7961       _CheckNodeNotDrained(self.lu, target_node)
7962       if not self.failover:
7963         result = self.rpc.call_instance_migratable(instance.primary_node,
7964                                                    instance)
7965         if result.fail_msg and self.fallback:
7966           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7967                           " failover")
7968           self.failover = True
7969         else:
7970           result.Raise("Can't migrate, please use failover",
7971                        prereq=True, ecode=errors.ECODE_STATE)
7972
7973     assert not (self.failover and self.cleanup)
7974
7975     if not self.failover:
7976       if self.lu.op.live is not None and self.lu.op.mode is not None:
7977         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7978                                    " parameters are accepted",
7979                                    errors.ECODE_INVAL)
7980       if self.lu.op.live is not None:
7981         if self.lu.op.live:
7982           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7983         else:
7984           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7985         # reset the 'live' parameter to None so that repeated
7986         # invocations of CheckPrereq do not raise an exception
7987         self.lu.op.live = None
7988       elif self.lu.op.mode is None:
7989         # read the default value from the hypervisor
7990         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7991         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7992
7993       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7994     else:
7995       # Failover is never live
7996       self.live = False
7997
7998     if not (self.failover or self.cleanup):
7999       remote_info = self.rpc.call_instance_info(instance.primary_node,
8000                                                 instance.name,
8001                                                 instance.hypervisor)
8002       remote_info.Raise("Error checking instance on node %s" %
8003                         instance.primary_node)
8004       instance_running = bool(remote_info.payload)
8005       if instance_running:
8006         self.current_mem = int(remote_info.payload["memory"])
8007
8008   def _RunAllocator(self):
8009     """Run the allocator based on input opcode.
8010
8011     """
8012     # FIXME: add a self.ignore_ipolicy option
8013     ial = IAllocator(self.cfg, self.rpc,
8014                      mode=constants.IALLOCATOR_MODE_RELOC,
8015                      name=self.instance_name,
8016                      # TODO See why hail breaks with a single node below
8017                      relocate_from=[self.instance.primary_node,
8018                                     self.instance.primary_node],
8019                      )
8020
8021     ial.Run(self.lu.op.iallocator)
8022
8023     if not ial.success:
8024       raise errors.OpPrereqError("Can't compute nodes using"
8025                                  " iallocator '%s': %s" %
8026                                  (self.lu.op.iallocator, ial.info),
8027                                  errors.ECODE_NORES)
8028     if len(ial.result) != ial.required_nodes:
8029       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8030                                  " of nodes (%s), required %s" %
8031                                  (self.lu.op.iallocator, len(ial.result),
8032                                   ial.required_nodes), errors.ECODE_FAULT)
8033     self.target_node = ial.result[0]
8034     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8035                  self.instance_name, self.lu.op.iallocator,
8036                  utils.CommaJoin(ial.result))
8037
8038   def _WaitUntilSync(self):
8039     """Poll with custom rpc for disk sync.
8040
8041     This uses our own step-based rpc call.
8042
8043     """
8044     self.feedback_fn("* wait until resync is done")
8045     all_done = False
8046     while not all_done:
8047       all_done = True
8048       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8049                                             self.nodes_ip,
8050                                             self.instance.disks)
8051       min_percent = 100
8052       for node, nres in result.items():
8053         nres.Raise("Cannot resync disks on node %s" % node)
8054         node_done, node_percent = nres.payload
8055         all_done = all_done and node_done
8056         if node_percent is not None:
8057           min_percent = min(min_percent, node_percent)
8058       if not all_done:
8059         if min_percent < 100:
8060           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8061         time.sleep(2)
8062
8063   def _EnsureSecondary(self, node):
8064     """Demote a node to secondary.
8065
8066     """
8067     self.feedback_fn("* switching node %s to secondary mode" % node)
8068
8069     for dev in self.instance.disks:
8070       self.cfg.SetDiskID(dev, node)
8071
8072     result = self.rpc.call_blockdev_close(node, self.instance.name,
8073                                           self.instance.disks)
8074     result.Raise("Cannot change disk to secondary on node %s" % node)
8075
8076   def _GoStandalone(self):
8077     """Disconnect from the network.
8078
8079     """
8080     self.feedback_fn("* changing into standalone mode")
8081     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8082                                                self.instance.disks)
8083     for node, nres in result.items():
8084       nres.Raise("Cannot disconnect disks node %s" % node)
8085
8086   def _GoReconnect(self, multimaster):
8087     """Reconnect to the network.
8088
8089     """
8090     if multimaster:
8091       msg = "dual-master"
8092     else:
8093       msg = "single-master"
8094     self.feedback_fn("* changing disks into %s mode" % msg)
8095     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8096                                            self.instance.disks,
8097                                            self.instance.name, multimaster)
8098     for node, nres in result.items():
8099       nres.Raise("Cannot change disks config on node %s" % node)
8100
8101   def _ExecCleanup(self):
8102     """Try to cleanup after a failed migration.
8103
8104     The cleanup is done by:
8105       - check that the instance is running only on one node
8106         (and update the config if needed)
8107       - change disks on its secondary node to secondary
8108       - wait until disks are fully synchronized
8109       - disconnect from the network
8110       - change disks into single-master mode
8111       - wait again until disks are fully synchronized
8112
8113     """
8114     instance = self.instance
8115     target_node = self.target_node
8116     source_node = self.source_node
8117
8118     # check running on only one node
8119     self.feedback_fn("* checking where the instance actually runs"
8120                      " (if this hangs, the hypervisor might be in"
8121                      " a bad state)")
8122     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8123     for node, result in ins_l.items():
8124       result.Raise("Can't contact node %s" % node)
8125
8126     runningon_source = instance.name in ins_l[source_node].payload
8127     runningon_target = instance.name in ins_l[target_node].payload
8128
8129     if runningon_source and runningon_target:
8130       raise errors.OpExecError("Instance seems to be running on two nodes,"
8131                                " or the hypervisor is confused; you will have"
8132                                " to ensure manually that it runs only on one"
8133                                " and restart this operation")
8134
8135     if not (runningon_source or runningon_target):
8136       raise errors.OpExecError("Instance does not seem to be running at all;"
8137                                " in this case it's safer to repair by"
8138                                " running 'gnt-instance stop' to ensure disk"
8139                                " shutdown, and then restarting it")
8140
8141     if runningon_target:
8142       # the migration has actually succeeded, we need to update the config
8143       self.feedback_fn("* instance running on secondary node (%s),"
8144                        " updating config" % target_node)
8145       instance.primary_node = target_node
8146       self.cfg.Update(instance, self.feedback_fn)
8147       demoted_node = source_node
8148     else:
8149       self.feedback_fn("* instance confirmed to be running on its"
8150                        " primary node (%s)" % source_node)
8151       demoted_node = target_node
8152
8153     if instance.disk_template in constants.DTS_INT_MIRROR:
8154       self._EnsureSecondary(demoted_node)
8155       try:
8156         self._WaitUntilSync()
8157       except errors.OpExecError:
8158         # we ignore here errors, since if the device is standalone, it
8159         # won't be able to sync
8160         pass
8161       self._GoStandalone()
8162       self._GoReconnect(False)
8163       self._WaitUntilSync()
8164
8165     self.feedback_fn("* done")
8166
8167   def _RevertDiskStatus(self):
8168     """Try to revert the disk status after a failed migration.
8169
8170     """
8171     target_node = self.target_node
8172     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8173       return
8174
8175     try:
8176       self._EnsureSecondary(target_node)
8177       self._GoStandalone()
8178       self._GoReconnect(False)
8179       self._WaitUntilSync()
8180     except errors.OpExecError, err:
8181       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8182                          " please try to recover the instance manually;"
8183                          " error '%s'" % str(err))
8184
8185   def _AbortMigration(self):
8186     """Call the hypervisor code to abort a started migration.
8187
8188     """
8189     instance = self.instance
8190     target_node = self.target_node
8191     source_node = self.source_node
8192     migration_info = self.migration_info
8193
8194     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8195                                                                  instance,
8196                                                                  migration_info,
8197                                                                  False)
8198     abort_msg = abort_result.fail_msg
8199     if abort_msg:
8200       logging.error("Aborting migration failed on target node %s: %s",
8201                     target_node, abort_msg)
8202       # Don't raise an exception here, as we stil have to try to revert the
8203       # disk status, even if this step failed.
8204
8205     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8206         instance, False, self.live)
8207     abort_msg = abort_result.fail_msg
8208     if abort_msg:
8209       logging.error("Aborting migration failed on source node %s: %s",
8210                     source_node, abort_msg)
8211
8212   def _ExecMigration(self):
8213     """Migrate an instance.
8214
8215     The migrate is done by:
8216       - change the disks into dual-master mode
8217       - wait until disks are fully synchronized again
8218       - migrate the instance
8219       - change disks on the new secondary node (the old primary) to secondary
8220       - wait until disks are fully synchronized
8221       - change disks into single-master mode
8222
8223     """
8224     instance = self.instance
8225     target_node = self.target_node
8226     source_node = self.source_node
8227
8228     # Check for hypervisor version mismatch and warn the user.
8229     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8230                                        None, [self.instance.hypervisor])
8231     for ninfo in nodeinfo.values():
8232       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8233                   ninfo.node)
8234     (_, _, (src_info, )) = nodeinfo[source_node].payload
8235     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8236
8237     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8238         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8239       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8240       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8241       if src_version != dst_version:
8242         self.feedback_fn("* warning: hypervisor version mismatch between"
8243                          " source (%s) and target (%s) node" %
8244                          (src_version, dst_version))
8245
8246     self.feedback_fn("* checking disk consistency between source and target")
8247     for (idx, dev) in enumerate(instance.disks):
8248       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8249         raise errors.OpExecError("Disk %s is degraded or not fully"
8250                                  " synchronized on target node,"
8251                                  " aborting migration" % idx)
8252
8253     if self.current_mem > self.tgt_free_mem:
8254       if not self.allow_runtime_changes:
8255         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8256                                  " free memory to fit instance %s on target"
8257                                  " node %s (have %dMB, need %dMB)" %
8258                                  (instance.name, target_node,
8259                                   self.tgt_free_mem, self.current_mem))
8260       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8261       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8262                                                      instance,
8263                                                      self.tgt_free_mem)
8264       rpcres.Raise("Cannot modify instance runtime memory")
8265
8266     # First get the migration information from the remote node
8267     result = self.rpc.call_migration_info(source_node, instance)
8268     msg = result.fail_msg
8269     if msg:
8270       log_err = ("Failed fetching source migration information from %s: %s" %
8271                  (source_node, msg))
8272       logging.error(log_err)
8273       raise errors.OpExecError(log_err)
8274
8275     self.migration_info = migration_info = result.payload
8276
8277     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8278       # Then switch the disks to master/master mode
8279       self._EnsureSecondary(target_node)
8280       self._GoStandalone()
8281       self._GoReconnect(True)
8282       self._WaitUntilSync()
8283
8284     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8285     result = self.rpc.call_accept_instance(target_node,
8286                                            instance,
8287                                            migration_info,
8288                                            self.nodes_ip[target_node])
8289
8290     msg = result.fail_msg
8291     if msg:
8292       logging.error("Instance pre-migration failed, trying to revert"
8293                     " disk status: %s", msg)
8294       self.feedback_fn("Pre-migration failed, aborting")
8295       self._AbortMigration()
8296       self._RevertDiskStatus()
8297       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8298                                (instance.name, msg))
8299
8300     self.feedback_fn("* migrating instance to %s" % target_node)
8301     result = self.rpc.call_instance_migrate(source_node, instance,
8302                                             self.nodes_ip[target_node],
8303                                             self.live)
8304     msg = result.fail_msg
8305     if msg:
8306       logging.error("Instance migration failed, trying to revert"
8307                     " disk status: %s", msg)
8308       self.feedback_fn("Migration failed, aborting")
8309       self._AbortMigration()
8310       self._RevertDiskStatus()
8311       raise errors.OpExecError("Could not migrate instance %s: %s" %
8312                                (instance.name, msg))
8313
8314     self.feedback_fn("* starting memory transfer")
8315     last_feedback = time.time()
8316     while True:
8317       result = self.rpc.call_instance_get_migration_status(source_node,
8318                                                            instance)
8319       msg = result.fail_msg
8320       ms = result.payload   # MigrationStatus instance
8321       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8322         logging.error("Instance migration failed, trying to revert"
8323                       " disk status: %s", msg)
8324         self.feedback_fn("Migration failed, aborting")
8325         self._AbortMigration()
8326         self._RevertDiskStatus()
8327         raise errors.OpExecError("Could not migrate instance %s: %s" %
8328                                  (instance.name, msg))
8329
8330       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8331         self.feedback_fn("* memory transfer complete")
8332         break
8333
8334       if (utils.TimeoutExpired(last_feedback,
8335                                self._MIGRATION_FEEDBACK_INTERVAL) and
8336           ms.transferred_ram is not None):
8337         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8338         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8339         last_feedback = time.time()
8340
8341       time.sleep(self._MIGRATION_POLL_INTERVAL)
8342
8343     result = self.rpc.call_instance_finalize_migration_src(source_node,
8344                                                            instance,
8345                                                            True,
8346                                                            self.live)
8347     msg = result.fail_msg
8348     if msg:
8349       logging.error("Instance migration succeeded, but finalization failed"
8350                     " on the source node: %s", msg)
8351       raise errors.OpExecError("Could not finalize instance migration: %s" %
8352                                msg)
8353
8354     instance.primary_node = target_node
8355
8356     # distribute new instance config to the other nodes
8357     self.cfg.Update(instance, self.feedback_fn)
8358
8359     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8360                                                            instance,
8361                                                            migration_info,
8362                                                            True)
8363     msg = result.fail_msg
8364     if msg:
8365       logging.error("Instance migration succeeded, but finalization failed"
8366                     " on the target node: %s", msg)
8367       raise errors.OpExecError("Could not finalize instance migration: %s" %
8368                                msg)
8369
8370     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8371       self._EnsureSecondary(source_node)
8372       self._WaitUntilSync()
8373       self._GoStandalone()
8374       self._GoReconnect(False)
8375       self._WaitUntilSync()
8376
8377     # If the instance's disk template is `rbd' and there was a successful
8378     # migration, unmap the device from the source node.
8379     if self.instance.disk_template == constants.DT_RBD:
8380       disks = _ExpandCheckDisks(instance, instance.disks)
8381       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8382       for disk in disks:
8383         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8384         msg = result.fail_msg
8385         if msg:
8386           logging.error("Migration was successful, but couldn't unmap the"
8387                         " block device %s on source node %s: %s",
8388                         disk.iv_name, source_node, msg)
8389           logging.error("You need to unmap the device %s manually on %s",
8390                         disk.iv_name, source_node)
8391
8392     self.feedback_fn("* done")
8393
8394   def _ExecFailover(self):
8395     """Failover an instance.
8396
8397     The failover is done by shutting it down on its present node and
8398     starting it on the secondary.
8399
8400     """
8401     instance = self.instance
8402     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8403
8404     source_node = instance.primary_node
8405     target_node = self.target_node
8406
8407     if instance.admin_state == constants.ADMINST_UP:
8408       self.feedback_fn("* checking disk consistency between source and target")
8409       for (idx, dev) in enumerate(instance.disks):
8410         # for drbd, these are drbd over lvm
8411         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8412           if primary_node.offline:
8413             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8414                              " target node %s" %
8415                              (primary_node.name, idx, target_node))
8416           elif not self.ignore_consistency:
8417             raise errors.OpExecError("Disk %s is degraded on target node,"
8418                                      " aborting failover" % idx)
8419     else:
8420       self.feedback_fn("* not checking disk consistency as instance is not"
8421                        " running")
8422
8423     self.feedback_fn("* shutting down instance on source node")
8424     logging.info("Shutting down instance %s on node %s",
8425                  instance.name, source_node)
8426
8427     result = self.rpc.call_instance_shutdown(source_node, instance,
8428                                              self.shutdown_timeout)
8429     msg = result.fail_msg
8430     if msg:
8431       if self.ignore_consistency or primary_node.offline:
8432         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8433                            " proceeding anyway; please make sure node"
8434                            " %s is down; error details: %s",
8435                            instance.name, source_node, source_node, msg)
8436       else:
8437         raise errors.OpExecError("Could not shutdown instance %s on"
8438                                  " node %s: %s" %
8439                                  (instance.name, source_node, msg))
8440
8441     self.feedback_fn("* deactivating the instance's disks on source node")
8442     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8443       raise errors.OpExecError("Can't shut down the instance's disks")
8444
8445     instance.primary_node = target_node
8446     # distribute new instance config to the other nodes
8447     self.cfg.Update(instance, self.feedback_fn)
8448
8449     # Only start the instance if it's marked as up
8450     if instance.admin_state == constants.ADMINST_UP:
8451       self.feedback_fn("* activating the instance's disks on target node %s" %
8452                        target_node)
8453       logging.info("Starting instance %s on node %s",
8454                    instance.name, target_node)
8455
8456       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8457                                            ignore_secondaries=True)
8458       if not disks_ok:
8459         _ShutdownInstanceDisks(self.lu, instance)
8460         raise errors.OpExecError("Can't activate the instance's disks")
8461
8462       self.feedback_fn("* starting the instance on the target node %s" %
8463                        target_node)
8464       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8465                                             False)
8466       msg = result.fail_msg
8467       if msg:
8468         _ShutdownInstanceDisks(self.lu, instance)
8469         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8470                                  (instance.name, target_node, msg))
8471
8472   def Exec(self, feedback_fn):
8473     """Perform the migration.
8474
8475     """
8476     self.feedback_fn = feedback_fn
8477     self.source_node = self.instance.primary_node
8478
8479     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8480     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8481       self.target_node = self.instance.secondary_nodes[0]
8482       # Otherwise self.target_node has been populated either
8483       # directly, or through an iallocator.
8484
8485     self.all_nodes = [self.source_node, self.target_node]
8486     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8487                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8488
8489     if self.failover:
8490       feedback_fn("Failover instance %s" % self.instance.name)
8491       self._ExecFailover()
8492     else:
8493       feedback_fn("Migrating instance %s" % self.instance.name)
8494
8495       if self.cleanup:
8496         return self._ExecCleanup()
8497       else:
8498         return self._ExecMigration()
8499
8500
8501 def _CreateBlockDev(lu, node, instance, device, force_create,
8502                     info, force_open):
8503   """Create a tree of block devices on a given node.
8504
8505   If this device type has to be created on secondaries, create it and
8506   all its children.
8507
8508   If not, just recurse to children keeping the same 'force' value.
8509
8510   @param lu: the lu on whose behalf we execute
8511   @param node: the node on which to create the device
8512   @type instance: L{objects.Instance}
8513   @param instance: the instance which owns the device
8514   @type device: L{objects.Disk}
8515   @param device: the device to create
8516   @type force_create: boolean
8517   @param force_create: whether to force creation of this device; this
8518       will be change to True whenever we find a device which has
8519       CreateOnSecondary() attribute
8520   @param info: the extra 'metadata' we should attach to the device
8521       (this will be represented as a LVM tag)
8522   @type force_open: boolean
8523   @param force_open: this parameter will be passes to the
8524       L{backend.BlockdevCreate} function where it specifies
8525       whether we run on primary or not, and it affects both
8526       the child assembly and the device own Open() execution
8527
8528   """
8529   if device.CreateOnSecondary():
8530     force_create = True
8531
8532   if device.children:
8533     for child in device.children:
8534       _CreateBlockDev(lu, node, instance, child, force_create,
8535                       info, force_open)
8536
8537   if not force_create:
8538     return
8539
8540   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8541
8542
8543 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8544   """Create a single block device on a given node.
8545
8546   This will not recurse over children of the device, so they must be
8547   created in advance.
8548
8549   @param lu: the lu on whose behalf we execute
8550   @param node: the node on which to create the device
8551   @type instance: L{objects.Instance}
8552   @param instance: the instance which owns the device
8553   @type device: L{objects.Disk}
8554   @param device: the device to create
8555   @param info: the extra 'metadata' we should attach to the device
8556       (this will be represented as a LVM tag)
8557   @type force_open: boolean
8558   @param force_open: this parameter will be passes to the
8559       L{backend.BlockdevCreate} function where it specifies
8560       whether we run on primary or not, and it affects both
8561       the child assembly and the device own Open() execution
8562
8563   """
8564   lu.cfg.SetDiskID(device, node)
8565   result = lu.rpc.call_blockdev_create(node, device, device.size,
8566                                        instance.name, force_open, info)
8567   result.Raise("Can't create block device %s on"
8568                " node %s for instance %s" % (device, node, instance.name))
8569   if device.physical_id is None:
8570     device.physical_id = result.payload
8571
8572
8573 def _GenerateUniqueNames(lu, exts):
8574   """Generate a suitable LV name.
8575
8576   This will generate a logical volume name for the given instance.
8577
8578   """
8579   results = []
8580   for val in exts:
8581     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8582     results.append("%s%s" % (new_id, val))
8583   return results
8584
8585
8586 def _ComputeLDParams(disk_template, disk_params):
8587   """Computes Logical Disk parameters from Disk Template parameters.
8588
8589   @type disk_template: string
8590   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8591   @type disk_params: dict
8592   @param disk_params: disk template parameters; dict(template_name -> parameters
8593   @rtype: list(dict)
8594   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8595     contains the LD parameters of the node. The tree is flattened in-order.
8596
8597   """
8598   if disk_template not in constants.DISK_TEMPLATES:
8599     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8600
8601   result = list()
8602   dt_params = disk_params[disk_template]
8603   if disk_template == constants.DT_DRBD8:
8604     drbd_params = {
8605       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8606       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8607       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8608       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8609       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8610       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8611       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8612       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8613       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8614       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8615       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8616       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8617       }
8618
8619     drbd_params = \
8620       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8621                        drbd_params)
8622
8623     result.append(drbd_params)
8624
8625     # data LV
8626     data_params = {
8627       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8628       }
8629     data_params = \
8630       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8631                        data_params)
8632     result.append(data_params)
8633
8634     # metadata LV
8635     meta_params = {
8636       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8637       }
8638     meta_params = \
8639       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8640                        meta_params)
8641     result.append(meta_params)
8642
8643   elif (disk_template == constants.DT_FILE or
8644         disk_template == constants.DT_SHARED_FILE):
8645     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8646
8647   elif disk_template == constants.DT_PLAIN:
8648     params = {
8649       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8650       }
8651     params = \
8652       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8653                        params)
8654     result.append(params)
8655
8656   elif disk_template == constants.DT_BLOCK:
8657     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8658
8659   elif disk_template == constants.DT_RBD:
8660     params = {
8661       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8662       }
8663     params = \
8664       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8665                        params)
8666     result.append(params)
8667
8668   return result
8669
8670
8671 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8672                          iv_name, p_minor, s_minor, drbd_params, data_params,
8673                          meta_params):
8674   """Generate a drbd8 device complete with its children.
8675
8676   """
8677   assert len(vgnames) == len(names) == 2
8678   port = lu.cfg.AllocatePort()
8679   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8680
8681   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8682                           logical_id=(vgnames[0], names[0]),
8683                           params=data_params)
8684   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8685                           logical_id=(vgnames[1], names[1]),
8686                           params=meta_params)
8687   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8688                           logical_id=(primary, secondary, port,
8689                                       p_minor, s_minor,
8690                                       shared_secret),
8691                           children=[dev_data, dev_meta],
8692                           iv_name=iv_name, params=drbd_params)
8693   return drbd_dev
8694
8695
8696 _DISK_TEMPLATE_NAME_PREFIX = {
8697   constants.DT_PLAIN: "",
8698   constants.DT_RBD: ".rbd",
8699   }
8700
8701
8702 _DISK_TEMPLATE_DEVICE_TYPE = {
8703   constants.DT_PLAIN: constants.LD_LV,
8704   constants.DT_FILE: constants.LD_FILE,
8705   constants.DT_SHARED_FILE: constants.LD_FILE,
8706   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8707   constants.DT_RBD: constants.LD_RBD,
8708   }
8709
8710
8711 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8712     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8713     feedback_fn, disk_params,
8714     _req_file_storage=opcodes.RequireFileStorage,
8715     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8716   """Generate the entire disk layout for a given template type.
8717
8718   """
8719   #TODO: compute space requirements
8720
8721   vgname = lu.cfg.GetVGName()
8722   disk_count = len(disk_info)
8723   disks = []
8724   ld_params = _ComputeLDParams(template_name, disk_params)
8725
8726   if template_name == constants.DT_DISKLESS:
8727     pass
8728   elif template_name == constants.DT_DRBD8:
8729     drbd_params, data_params, meta_params = ld_params
8730     if len(secondary_nodes) != 1:
8731       raise errors.ProgrammerError("Wrong template configuration")
8732     remote_node = secondary_nodes[0]
8733     minors = lu.cfg.AllocateDRBDMinor(
8734       [primary_node, remote_node] * len(disk_info), instance_name)
8735
8736     names = []
8737     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8738                                                for i in range(disk_count)]):
8739       names.append(lv_prefix + "_data")
8740       names.append(lv_prefix + "_meta")
8741     for idx, disk in enumerate(disk_info):
8742       disk_index = idx + base_index
8743       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8744       data_vg = disk.get(constants.IDISK_VG, vgname)
8745       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8746       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8747                                       disk[constants.IDISK_SIZE],
8748                                       [data_vg, meta_vg],
8749                                       names[idx * 2:idx * 2 + 2],
8750                                       "disk/%d" % disk_index,
8751                                       minors[idx * 2], minors[idx * 2 + 1],
8752                                       drbd_params, data_params, meta_params)
8753       disk_dev.mode = disk[constants.IDISK_MODE]
8754       disks.append(disk_dev)
8755   else:
8756     if secondary_nodes:
8757       raise errors.ProgrammerError("Wrong template configuration")
8758
8759     if template_name == constants.DT_FILE:
8760       _req_file_storage()
8761     elif template_name == constants.DT_SHARED_FILE:
8762       _req_shr_file_storage()
8763
8764     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8765     if name_prefix is None:
8766       names = None
8767     else:
8768       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8769                                         (name_prefix, base_index + i)
8770                                         for i in range(disk_count)])
8771
8772     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8773
8774     if template_name == constants.DT_PLAIN:
8775       def logical_id_fn(idx, _, disk):
8776         vg = disk.get(constants.IDISK_VG, vgname)
8777         return (vg, names[idx])
8778     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8779       logical_id_fn = \
8780         lambda _, disk_index, disk: (file_driver,
8781                                      "%s/disk%d" % (file_storage_dir,
8782                                                     disk_index))
8783     elif template_name == constants.DT_BLOCK:
8784       logical_id_fn = \
8785         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8786                                        disk[constants.IDISK_ADOPT])
8787     elif template_name == constants.DT_RBD:
8788       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8789     else:
8790       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8791
8792     for idx, disk in enumerate(disk_info):
8793       disk_index = idx + base_index
8794       size = disk[constants.IDISK_SIZE]
8795       feedback_fn("* disk %s, size %s" %
8796                   (disk_index, utils.FormatUnit(size, "h")))
8797       disks.append(objects.Disk(dev_type=dev_type, size=size,
8798                                 logical_id=logical_id_fn(idx, disk_index, disk),
8799                                 iv_name="disk/%d" % disk_index,
8800                                 mode=disk[constants.IDISK_MODE],
8801                                 params=ld_params[0]))
8802
8803   return disks
8804
8805
8806 def _GetInstanceInfoText(instance):
8807   """Compute that text that should be added to the disk's metadata.
8808
8809   """
8810   return "originstname+%s" % instance.name
8811
8812
8813 def _CalcEta(time_taken, written, total_size):
8814   """Calculates the ETA based on size written and total size.
8815
8816   @param time_taken: The time taken so far
8817   @param written: amount written so far
8818   @param total_size: The total size of data to be written
8819   @return: The remaining time in seconds
8820
8821   """
8822   avg_time = time_taken / float(written)
8823   return (total_size - written) * avg_time
8824
8825
8826 def _WipeDisks(lu, instance):
8827   """Wipes instance disks.
8828
8829   @type lu: L{LogicalUnit}
8830   @param lu: the logical unit on whose behalf we execute
8831   @type instance: L{objects.Instance}
8832   @param instance: the instance whose disks we should create
8833   @return: the success of the wipe
8834
8835   """
8836   node = instance.primary_node
8837
8838   for device in instance.disks:
8839     lu.cfg.SetDiskID(device, node)
8840
8841   logging.info("Pause sync of instance %s disks", instance.name)
8842   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8843
8844   for idx, success in enumerate(result.payload):
8845     if not success:
8846       logging.warn("pause-sync of instance %s for disks %d failed",
8847                    instance.name, idx)
8848
8849   try:
8850     for idx, device in enumerate(instance.disks):
8851       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8852       # MAX_WIPE_CHUNK at max
8853       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8854                             constants.MIN_WIPE_CHUNK_PERCENT)
8855       # we _must_ make this an int, otherwise rounding errors will
8856       # occur
8857       wipe_chunk_size = int(wipe_chunk_size)
8858
8859       lu.LogInfo("* Wiping disk %d", idx)
8860       logging.info("Wiping disk %d for instance %s, node %s using"
8861                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8862
8863       offset = 0
8864       size = device.size
8865       last_output = 0
8866       start_time = time.time()
8867
8868       while offset < size:
8869         wipe_size = min(wipe_chunk_size, size - offset)
8870         logging.debug("Wiping disk %d, offset %s, chunk %s",
8871                       idx, offset, wipe_size)
8872         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8873         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8874                      (idx, offset, wipe_size))
8875         now = time.time()
8876         offset += wipe_size
8877         if now - last_output >= 60:
8878           eta = _CalcEta(now - start_time, offset, size)
8879           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8880                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8881           last_output = now
8882   finally:
8883     logging.info("Resume sync of instance %s disks", instance.name)
8884
8885     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8886
8887     for idx, success in enumerate(result.payload):
8888       if not success:
8889         lu.LogWarning("Resume sync of disk %d failed, please have a"
8890                       " look at the status and troubleshoot the issue", idx)
8891         logging.warn("resume-sync of instance %s for disks %d failed",
8892                      instance.name, idx)
8893
8894
8895 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8896   """Create all disks for an instance.
8897
8898   This abstracts away some work from AddInstance.
8899
8900   @type lu: L{LogicalUnit}
8901   @param lu: the logical unit on whose behalf we execute
8902   @type instance: L{objects.Instance}
8903   @param instance: the instance whose disks we should create
8904   @type to_skip: list
8905   @param to_skip: list of indices to skip
8906   @type target_node: string
8907   @param target_node: if passed, overrides the target node for creation
8908   @rtype: boolean
8909   @return: the success of the creation
8910
8911   """
8912   info = _GetInstanceInfoText(instance)
8913   if target_node is None:
8914     pnode = instance.primary_node
8915     all_nodes = instance.all_nodes
8916   else:
8917     pnode = target_node
8918     all_nodes = [pnode]
8919
8920   if instance.disk_template in constants.DTS_FILEBASED:
8921     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8922     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8923
8924     result.Raise("Failed to create directory '%s' on"
8925                  " node %s" % (file_storage_dir, pnode))
8926
8927   # Note: this needs to be kept in sync with adding of disks in
8928   # LUInstanceSetParams
8929   for idx, device in enumerate(instance.disks):
8930     if to_skip and idx in to_skip:
8931       continue
8932     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8933     #HARDCODE
8934     for node in all_nodes:
8935       f_create = node == pnode
8936       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8937
8938
8939 def _RemoveDisks(lu, instance, target_node=None):
8940   """Remove all disks for an instance.
8941
8942   This abstracts away some work from `AddInstance()` and
8943   `RemoveInstance()`. Note that in case some of the devices couldn't
8944   be removed, the removal will continue with the other ones (compare
8945   with `_CreateDisks()`).
8946
8947   @type lu: L{LogicalUnit}
8948   @param lu: the logical unit on whose behalf we execute
8949   @type instance: L{objects.Instance}
8950   @param instance: the instance whose disks we should remove
8951   @type target_node: string
8952   @param target_node: used to override the node on which to remove the disks
8953   @rtype: boolean
8954   @return: the success of the removal
8955
8956   """
8957   logging.info("Removing block devices for instance %s", instance.name)
8958
8959   all_result = True
8960   for (idx, device) in enumerate(instance.disks):
8961     if target_node:
8962       edata = [(target_node, device)]
8963     else:
8964       edata = device.ComputeNodeTree(instance.primary_node)
8965     for node, disk in edata:
8966       lu.cfg.SetDiskID(disk, node)
8967       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8968       if msg:
8969         lu.LogWarning("Could not remove disk %s on node %s,"
8970                       " continuing anyway: %s", idx, node, msg)
8971         all_result = False
8972
8973     # if this is a DRBD disk, return its port to the pool
8974     if device.dev_type in constants.LDS_DRBD:
8975       tcp_port = device.logical_id[2]
8976       lu.cfg.AddTcpUdpPort(tcp_port)
8977
8978   if instance.disk_template == constants.DT_FILE:
8979     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8980     if target_node:
8981       tgt = target_node
8982     else:
8983       tgt = instance.primary_node
8984     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8985     if result.fail_msg:
8986       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8987                     file_storage_dir, instance.primary_node, result.fail_msg)
8988       all_result = False
8989
8990   return all_result
8991
8992
8993 def _ComputeDiskSizePerVG(disk_template, disks):
8994   """Compute disk size requirements in the volume group
8995
8996   """
8997   def _compute(disks, payload):
8998     """Universal algorithm.
8999
9000     """
9001     vgs = {}
9002     for disk in disks:
9003       vgs[disk[constants.IDISK_VG]] = \
9004         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9005
9006     return vgs
9007
9008   # Required free disk space as a function of disk and swap space
9009   req_size_dict = {
9010     constants.DT_DISKLESS: {},
9011     constants.DT_PLAIN: _compute(disks, 0),
9012     # 128 MB are added for drbd metadata for each disk
9013     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9014     constants.DT_FILE: {},
9015     constants.DT_SHARED_FILE: {},
9016   }
9017
9018   if disk_template not in req_size_dict:
9019     raise errors.ProgrammerError("Disk template '%s' size requirement"
9020                                  " is unknown" % disk_template)
9021
9022   return req_size_dict[disk_template]
9023
9024
9025 def _ComputeDiskSize(disk_template, disks):
9026   """Compute disk size requirements in the volume group
9027
9028   """
9029   # Required free disk space as a function of disk and swap space
9030   req_size_dict = {
9031     constants.DT_DISKLESS: None,
9032     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9033     # 128 MB are added for drbd metadata for each disk
9034     constants.DT_DRBD8:
9035       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9036     constants.DT_FILE: None,
9037     constants.DT_SHARED_FILE: 0,
9038     constants.DT_BLOCK: 0,
9039     constants.DT_RBD: 0,
9040   }
9041
9042   if disk_template not in req_size_dict:
9043     raise errors.ProgrammerError("Disk template '%s' size requirement"
9044                                  " is unknown" % disk_template)
9045
9046   return req_size_dict[disk_template]
9047
9048
9049 def _FilterVmNodes(lu, nodenames):
9050   """Filters out non-vm_capable nodes from a list.
9051
9052   @type lu: L{LogicalUnit}
9053   @param lu: the logical unit for which we check
9054   @type nodenames: list
9055   @param nodenames: the list of nodes on which we should check
9056   @rtype: list
9057   @return: the list of vm-capable nodes
9058
9059   """
9060   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9061   return [name for name in nodenames if name not in vm_nodes]
9062
9063
9064 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9065   """Hypervisor parameter validation.
9066
9067   This function abstract the hypervisor parameter validation to be
9068   used in both instance create and instance modify.
9069
9070   @type lu: L{LogicalUnit}
9071   @param lu: the logical unit for which we check
9072   @type nodenames: list
9073   @param nodenames: the list of nodes on which we should check
9074   @type hvname: string
9075   @param hvname: the name of the hypervisor we should use
9076   @type hvparams: dict
9077   @param hvparams: the parameters which we need to check
9078   @raise errors.OpPrereqError: if the parameters are not valid
9079
9080   """
9081   nodenames = _FilterVmNodes(lu, nodenames)
9082
9083   cluster = lu.cfg.GetClusterInfo()
9084   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9085
9086   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9087   for node in nodenames:
9088     info = hvinfo[node]
9089     if info.offline:
9090       continue
9091     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9092
9093
9094 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9095   """OS parameters validation.
9096
9097   @type lu: L{LogicalUnit}
9098   @param lu: the logical unit for which we check
9099   @type required: boolean
9100   @param required: whether the validation should fail if the OS is not
9101       found
9102   @type nodenames: list
9103   @param nodenames: the list of nodes on which we should check
9104   @type osname: string
9105   @param osname: the name of the hypervisor we should use
9106   @type osparams: dict
9107   @param osparams: the parameters which we need to check
9108   @raise errors.OpPrereqError: if the parameters are not valid
9109
9110   """
9111   nodenames = _FilterVmNodes(lu, nodenames)
9112   result = lu.rpc.call_os_validate(nodenames, required, osname,
9113                                    [constants.OS_VALIDATE_PARAMETERS],
9114                                    osparams)
9115   for node, nres in result.items():
9116     # we don't check for offline cases since this should be run only
9117     # against the master node and/or an instance's nodes
9118     nres.Raise("OS Parameters validation failed on node %s" % node)
9119     if not nres.payload:
9120       lu.LogInfo("OS %s not found on node %s, validation skipped",
9121                  osname, node)
9122
9123
9124 class LUInstanceCreate(LogicalUnit):
9125   """Create an instance.
9126
9127   """
9128   HPATH = "instance-add"
9129   HTYPE = constants.HTYPE_INSTANCE
9130   REQ_BGL = False
9131
9132   def CheckArguments(self):
9133     """Check arguments.
9134
9135     """
9136     # do not require name_check to ease forward/backward compatibility
9137     # for tools
9138     if self.op.no_install and self.op.start:
9139       self.LogInfo("No-installation mode selected, disabling startup")
9140       self.op.start = False
9141     # validate/normalize the instance name
9142     self.op.instance_name = \
9143       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9144
9145     if self.op.ip_check and not self.op.name_check:
9146       # TODO: make the ip check more flexible and not depend on the name check
9147       raise errors.OpPrereqError("Cannot do IP address check without a name"
9148                                  " check", errors.ECODE_INVAL)
9149
9150     # check nics' parameter names
9151     for nic in self.op.nics:
9152       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9153
9154     # check disks. parameter names and consistent adopt/no-adopt strategy
9155     has_adopt = has_no_adopt = False
9156     for disk in self.op.disks:
9157       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9158       if constants.IDISK_ADOPT in disk:
9159         has_adopt = True
9160       else:
9161         has_no_adopt = True
9162     if has_adopt and has_no_adopt:
9163       raise errors.OpPrereqError("Either all disks are adopted or none is",
9164                                  errors.ECODE_INVAL)
9165     if has_adopt:
9166       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9167         raise errors.OpPrereqError("Disk adoption is not supported for the"
9168                                    " '%s' disk template" %
9169                                    self.op.disk_template,
9170                                    errors.ECODE_INVAL)
9171       if self.op.iallocator is not None:
9172         raise errors.OpPrereqError("Disk adoption not allowed with an"
9173                                    " iallocator script", errors.ECODE_INVAL)
9174       if self.op.mode == constants.INSTANCE_IMPORT:
9175         raise errors.OpPrereqError("Disk adoption not allowed for"
9176                                    " instance import", errors.ECODE_INVAL)
9177     else:
9178       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9179         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9180                                    " but no 'adopt' parameter given" %
9181                                    self.op.disk_template,
9182                                    errors.ECODE_INVAL)
9183
9184     self.adopt_disks = has_adopt
9185
9186     # instance name verification
9187     if self.op.name_check:
9188       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9189       self.op.instance_name = self.hostname1.name
9190       # used in CheckPrereq for ip ping check
9191       self.check_ip = self.hostname1.ip
9192     else:
9193       self.check_ip = None
9194
9195     # file storage checks
9196     if (self.op.file_driver and
9197         not self.op.file_driver in constants.FILE_DRIVER):
9198       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9199                                  self.op.file_driver, errors.ECODE_INVAL)
9200
9201     if self.op.disk_template == constants.DT_FILE:
9202       opcodes.RequireFileStorage()
9203     elif self.op.disk_template == constants.DT_SHARED_FILE:
9204       opcodes.RequireSharedFileStorage()
9205
9206     ### Node/iallocator related checks
9207     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9208
9209     if self.op.pnode is not None:
9210       if self.op.disk_template in constants.DTS_INT_MIRROR:
9211         if self.op.snode is None:
9212           raise errors.OpPrereqError("The networked disk templates need"
9213                                      " a mirror node", errors.ECODE_INVAL)
9214       elif self.op.snode:
9215         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9216                         " template")
9217         self.op.snode = None
9218
9219     self._cds = _GetClusterDomainSecret()
9220
9221     if self.op.mode == constants.INSTANCE_IMPORT:
9222       # On import force_variant must be True, because if we forced it at
9223       # initial install, our only chance when importing it back is that it
9224       # works again!
9225       self.op.force_variant = True
9226
9227       if self.op.no_install:
9228         self.LogInfo("No-installation mode has no effect during import")
9229
9230     elif self.op.mode == constants.INSTANCE_CREATE:
9231       if self.op.os_type is None:
9232         raise errors.OpPrereqError("No guest OS specified",
9233                                    errors.ECODE_INVAL)
9234       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9235         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9236                                    " installation" % self.op.os_type,
9237                                    errors.ECODE_STATE)
9238       if self.op.disk_template is None:
9239         raise errors.OpPrereqError("No disk template specified",
9240                                    errors.ECODE_INVAL)
9241
9242     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9243       # Check handshake to ensure both clusters have the same domain secret
9244       src_handshake = self.op.source_handshake
9245       if not src_handshake:
9246         raise errors.OpPrereqError("Missing source handshake",
9247                                    errors.ECODE_INVAL)
9248
9249       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9250                                                            src_handshake)
9251       if errmsg:
9252         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9253                                    errors.ECODE_INVAL)
9254
9255       # Load and check source CA
9256       self.source_x509_ca_pem = self.op.source_x509_ca
9257       if not self.source_x509_ca_pem:
9258         raise errors.OpPrereqError("Missing source X509 CA",
9259                                    errors.ECODE_INVAL)
9260
9261       try:
9262         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9263                                                     self._cds)
9264       except OpenSSL.crypto.Error, err:
9265         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9266                                    (err, ), errors.ECODE_INVAL)
9267
9268       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9269       if errcode is not None:
9270         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9271                                    errors.ECODE_INVAL)
9272
9273       self.source_x509_ca = cert
9274
9275       src_instance_name = self.op.source_instance_name
9276       if not src_instance_name:
9277         raise errors.OpPrereqError("Missing source instance name",
9278                                    errors.ECODE_INVAL)
9279
9280       self.source_instance_name = \
9281           netutils.GetHostname(name=src_instance_name).name
9282
9283     else:
9284       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9285                                  self.op.mode, errors.ECODE_INVAL)
9286
9287   def ExpandNames(self):
9288     """ExpandNames for CreateInstance.
9289
9290     Figure out the right locks for instance creation.
9291
9292     """
9293     self.needed_locks = {}
9294
9295     instance_name = self.op.instance_name
9296     # this is just a preventive check, but someone might still add this
9297     # instance in the meantime, and creation will fail at lock-add time
9298     if instance_name in self.cfg.GetInstanceList():
9299       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9300                                  instance_name, errors.ECODE_EXISTS)
9301
9302     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9303
9304     if self.op.iallocator:
9305       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9306       # specifying a group on instance creation and then selecting nodes from
9307       # that group
9308       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9309       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9310     else:
9311       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9312       nodelist = [self.op.pnode]
9313       if self.op.snode is not None:
9314         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9315         nodelist.append(self.op.snode)
9316       self.needed_locks[locking.LEVEL_NODE] = nodelist
9317       # Lock resources of instance's primary and secondary nodes (copy to
9318       # prevent accidential modification)
9319       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9320
9321     # in case of import lock the source node too
9322     if self.op.mode == constants.INSTANCE_IMPORT:
9323       src_node = self.op.src_node
9324       src_path = self.op.src_path
9325
9326       if src_path is None:
9327         self.op.src_path = src_path = self.op.instance_name
9328
9329       if src_node is None:
9330         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9331         self.op.src_node = None
9332         if os.path.isabs(src_path):
9333           raise errors.OpPrereqError("Importing an instance from a path"
9334                                      " requires a source node option",
9335                                      errors.ECODE_INVAL)
9336       else:
9337         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9338         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9339           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9340         if not os.path.isabs(src_path):
9341           self.op.src_path = src_path = \
9342             utils.PathJoin(constants.EXPORT_DIR, src_path)
9343
9344   def _RunAllocator(self):
9345     """Run the allocator based on input opcode.
9346
9347     """
9348     nics = [n.ToDict() for n in self.nics]
9349     ial = IAllocator(self.cfg, self.rpc,
9350                      mode=constants.IALLOCATOR_MODE_ALLOC,
9351                      name=self.op.instance_name,
9352                      disk_template=self.op.disk_template,
9353                      tags=self.op.tags,
9354                      os=self.op.os_type,
9355                      vcpus=self.be_full[constants.BE_VCPUS],
9356                      memory=self.be_full[constants.BE_MAXMEM],
9357                      spindle_usage=self.be_full[constants.BE_SPINDLE_USAGE],
9358                      disks=self.disks,
9359                      nics=nics,
9360                      hypervisor=self.op.hypervisor,
9361                      )
9362
9363     ial.Run(self.op.iallocator)
9364
9365     if not ial.success:
9366       raise errors.OpPrereqError("Can't compute nodes using"
9367                                  " iallocator '%s': %s" %
9368                                  (self.op.iallocator, ial.info),
9369                                  errors.ECODE_NORES)
9370     if len(ial.result) != ial.required_nodes:
9371       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9372                                  " of nodes (%s), required %s" %
9373                                  (self.op.iallocator, len(ial.result),
9374                                   ial.required_nodes), errors.ECODE_FAULT)
9375     self.op.pnode = ial.result[0]
9376     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9377                  self.op.instance_name, self.op.iallocator,
9378                  utils.CommaJoin(ial.result))
9379     if ial.required_nodes == 2:
9380       self.op.snode = ial.result[1]
9381
9382   def BuildHooksEnv(self):
9383     """Build hooks env.
9384
9385     This runs on master, primary and secondary nodes of the instance.
9386
9387     """
9388     env = {
9389       "ADD_MODE": self.op.mode,
9390       }
9391     if self.op.mode == constants.INSTANCE_IMPORT:
9392       env["SRC_NODE"] = self.op.src_node
9393       env["SRC_PATH"] = self.op.src_path
9394       env["SRC_IMAGES"] = self.src_images
9395
9396     env.update(_BuildInstanceHookEnv(
9397       name=self.op.instance_name,
9398       primary_node=self.op.pnode,
9399       secondary_nodes=self.secondaries,
9400       status=self.op.start,
9401       os_type=self.op.os_type,
9402       minmem=self.be_full[constants.BE_MINMEM],
9403       maxmem=self.be_full[constants.BE_MAXMEM],
9404       vcpus=self.be_full[constants.BE_VCPUS],
9405       nics=_NICListToTuple(self, self.nics),
9406       disk_template=self.op.disk_template,
9407       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9408              for d in self.disks],
9409       bep=self.be_full,
9410       hvp=self.hv_full,
9411       hypervisor_name=self.op.hypervisor,
9412       tags=self.op.tags,
9413     ))
9414
9415     return env
9416
9417   def BuildHooksNodes(self):
9418     """Build hooks nodes.
9419
9420     """
9421     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9422     return nl, nl
9423
9424   def _ReadExportInfo(self):
9425     """Reads the export information from disk.
9426
9427     It will override the opcode source node and path with the actual
9428     information, if these two were not specified before.
9429
9430     @return: the export information
9431
9432     """
9433     assert self.op.mode == constants.INSTANCE_IMPORT
9434
9435     src_node = self.op.src_node
9436     src_path = self.op.src_path
9437
9438     if src_node is None:
9439       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9440       exp_list = self.rpc.call_export_list(locked_nodes)
9441       found = False
9442       for node in exp_list:
9443         if exp_list[node].fail_msg:
9444           continue
9445         if src_path in exp_list[node].payload:
9446           found = True
9447           self.op.src_node = src_node = node
9448           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9449                                                        src_path)
9450           break
9451       if not found:
9452         raise errors.OpPrereqError("No export found for relative path %s" %
9453                                     src_path, errors.ECODE_INVAL)
9454
9455     _CheckNodeOnline(self, src_node)
9456     result = self.rpc.call_export_info(src_node, src_path)
9457     result.Raise("No export or invalid export found in dir %s" % src_path)
9458
9459     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9460     if not export_info.has_section(constants.INISECT_EXP):
9461       raise errors.ProgrammerError("Corrupted export config",
9462                                    errors.ECODE_ENVIRON)
9463
9464     ei_version = export_info.get(constants.INISECT_EXP, "version")
9465     if (int(ei_version) != constants.EXPORT_VERSION):
9466       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9467                                  (ei_version, constants.EXPORT_VERSION),
9468                                  errors.ECODE_ENVIRON)
9469     return export_info
9470
9471   def _ReadExportParams(self, einfo):
9472     """Use export parameters as defaults.
9473
9474     In case the opcode doesn't specify (as in override) some instance
9475     parameters, then try to use them from the export information, if
9476     that declares them.
9477
9478     """
9479     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9480
9481     if self.op.disk_template is None:
9482       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9483         self.op.disk_template = einfo.get(constants.INISECT_INS,
9484                                           "disk_template")
9485         if self.op.disk_template not in constants.DISK_TEMPLATES:
9486           raise errors.OpPrereqError("Disk template specified in configuration"
9487                                      " file is not one of the allowed values:"
9488                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9489       else:
9490         raise errors.OpPrereqError("No disk template specified and the export"
9491                                    " is missing the disk_template information",
9492                                    errors.ECODE_INVAL)
9493
9494     if not self.op.disks:
9495       disks = []
9496       # TODO: import the disk iv_name too
9497       for idx in range(constants.MAX_DISKS):
9498         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9499           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9500           disks.append({constants.IDISK_SIZE: disk_sz})
9501       self.op.disks = disks
9502       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9503         raise errors.OpPrereqError("No disk info specified and the export"
9504                                    " is missing the disk information",
9505                                    errors.ECODE_INVAL)
9506
9507     if not self.op.nics:
9508       nics = []
9509       for idx in range(constants.MAX_NICS):
9510         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9511           ndict = {}
9512           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9513             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9514             ndict[name] = v
9515           nics.append(ndict)
9516         else:
9517           break
9518       self.op.nics = nics
9519
9520     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9521       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9522
9523     if (self.op.hypervisor is None and
9524         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9525       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9526
9527     if einfo.has_section(constants.INISECT_HYP):
9528       # use the export parameters but do not override the ones
9529       # specified by the user
9530       for name, value in einfo.items(constants.INISECT_HYP):
9531         if name not in self.op.hvparams:
9532           self.op.hvparams[name] = value
9533
9534     if einfo.has_section(constants.INISECT_BEP):
9535       # use the parameters, without overriding
9536       for name, value in einfo.items(constants.INISECT_BEP):
9537         if name not in self.op.beparams:
9538           self.op.beparams[name] = value
9539         # Compatibility for the old "memory" be param
9540         if name == constants.BE_MEMORY:
9541           if constants.BE_MAXMEM not in self.op.beparams:
9542             self.op.beparams[constants.BE_MAXMEM] = value
9543           if constants.BE_MINMEM not in self.op.beparams:
9544             self.op.beparams[constants.BE_MINMEM] = value
9545     else:
9546       # try to read the parameters old style, from the main section
9547       for name in constants.BES_PARAMETERS:
9548         if (name not in self.op.beparams and
9549             einfo.has_option(constants.INISECT_INS, name)):
9550           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9551
9552     if einfo.has_section(constants.INISECT_OSP):
9553       # use the parameters, without overriding
9554       for name, value in einfo.items(constants.INISECT_OSP):
9555         if name not in self.op.osparams:
9556           self.op.osparams[name] = value
9557
9558   def _RevertToDefaults(self, cluster):
9559     """Revert the instance parameters to the default values.
9560
9561     """
9562     # hvparams
9563     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9564     for name in self.op.hvparams.keys():
9565       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9566         del self.op.hvparams[name]
9567     # beparams
9568     be_defs = cluster.SimpleFillBE({})
9569     for name in self.op.beparams.keys():
9570       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9571         del self.op.beparams[name]
9572     # nic params
9573     nic_defs = cluster.SimpleFillNIC({})
9574     for nic in self.op.nics:
9575       for name in constants.NICS_PARAMETERS:
9576         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9577           del nic[name]
9578     # osparams
9579     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9580     for name in self.op.osparams.keys():
9581       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9582         del self.op.osparams[name]
9583
9584   def _CalculateFileStorageDir(self):
9585     """Calculate final instance file storage dir.
9586
9587     """
9588     # file storage dir calculation/check
9589     self.instance_file_storage_dir = None
9590     if self.op.disk_template in constants.DTS_FILEBASED:
9591       # build the full file storage dir path
9592       joinargs = []
9593
9594       if self.op.disk_template == constants.DT_SHARED_FILE:
9595         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9596       else:
9597         get_fsd_fn = self.cfg.GetFileStorageDir
9598
9599       cfg_storagedir = get_fsd_fn()
9600       if not cfg_storagedir:
9601         raise errors.OpPrereqError("Cluster file storage dir not defined")
9602       joinargs.append(cfg_storagedir)
9603
9604       if self.op.file_storage_dir is not None:
9605         joinargs.append(self.op.file_storage_dir)
9606
9607       joinargs.append(self.op.instance_name)
9608
9609       # pylint: disable=W0142
9610       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9611
9612   def CheckPrereq(self): # pylint: disable=R0914
9613     """Check prerequisites.
9614
9615     """
9616     self._CalculateFileStorageDir()
9617
9618     if self.op.mode == constants.INSTANCE_IMPORT:
9619       export_info = self._ReadExportInfo()
9620       self._ReadExportParams(export_info)
9621
9622     if (not self.cfg.GetVGName() and
9623         self.op.disk_template not in constants.DTS_NOT_LVM):
9624       raise errors.OpPrereqError("Cluster does not support lvm-based"
9625                                  " instances", errors.ECODE_STATE)
9626
9627     if (self.op.hypervisor is None or
9628         self.op.hypervisor == constants.VALUE_AUTO):
9629       self.op.hypervisor = self.cfg.GetHypervisorType()
9630
9631     cluster = self.cfg.GetClusterInfo()
9632     enabled_hvs = cluster.enabled_hypervisors
9633     if self.op.hypervisor not in enabled_hvs:
9634       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9635                                  " cluster (%s)" % (self.op.hypervisor,
9636                                   ",".join(enabled_hvs)),
9637                                  errors.ECODE_STATE)
9638
9639     # Check tag validity
9640     for tag in self.op.tags:
9641       objects.TaggableObject.ValidateTag(tag)
9642
9643     # check hypervisor parameter syntax (locally)
9644     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9645     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9646                                       self.op.hvparams)
9647     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9648     hv_type.CheckParameterSyntax(filled_hvp)
9649     self.hv_full = filled_hvp
9650     # check that we don't specify global parameters on an instance
9651     _CheckGlobalHvParams(self.op.hvparams)
9652
9653     # fill and remember the beparams dict
9654     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9655     for param, value in self.op.beparams.iteritems():
9656       if value == constants.VALUE_AUTO:
9657         self.op.beparams[param] = default_beparams[param]
9658     objects.UpgradeBeParams(self.op.beparams)
9659     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9660     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9661
9662     # build os parameters
9663     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9664
9665     # now that hvp/bep are in final format, let's reset to defaults,
9666     # if told to do so
9667     if self.op.identify_defaults:
9668       self._RevertToDefaults(cluster)
9669
9670     # NIC buildup
9671     self.nics = []
9672     for idx, nic in enumerate(self.op.nics):
9673       nic_mode_req = nic.get(constants.INIC_MODE, None)
9674       nic_mode = nic_mode_req
9675       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9676         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9677
9678       # in routed mode, for the first nic, the default ip is 'auto'
9679       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9680         default_ip_mode = constants.VALUE_AUTO
9681       else:
9682         default_ip_mode = constants.VALUE_NONE
9683
9684       # ip validity checks
9685       ip = nic.get(constants.INIC_IP, default_ip_mode)
9686       if ip is None or ip.lower() == constants.VALUE_NONE:
9687         nic_ip = None
9688       elif ip.lower() == constants.VALUE_AUTO:
9689         if not self.op.name_check:
9690           raise errors.OpPrereqError("IP address set to auto but name checks"
9691                                      " have been skipped",
9692                                      errors.ECODE_INVAL)
9693         nic_ip = self.hostname1.ip
9694       else:
9695         if not netutils.IPAddress.IsValid(ip):
9696           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9697                                      errors.ECODE_INVAL)
9698         nic_ip = ip
9699
9700       # TODO: check the ip address for uniqueness
9701       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9702         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9703                                    errors.ECODE_INVAL)
9704
9705       # MAC address verification
9706       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9707       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9708         mac = utils.NormalizeAndValidateMac(mac)
9709
9710         try:
9711           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9712         except errors.ReservationError:
9713           raise errors.OpPrereqError("MAC address %s already in use"
9714                                      " in cluster" % mac,
9715                                      errors.ECODE_NOTUNIQUE)
9716
9717       #  Build nic parameters
9718       link = nic.get(constants.INIC_LINK, None)
9719       if link == constants.VALUE_AUTO:
9720         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9721       nicparams = {}
9722       if nic_mode_req:
9723         nicparams[constants.NIC_MODE] = nic_mode
9724       if link:
9725         nicparams[constants.NIC_LINK] = link
9726
9727       check_params = cluster.SimpleFillNIC(nicparams)
9728       objects.NIC.CheckParameterSyntax(check_params)
9729       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9730
9731     # disk checks/pre-build
9732     default_vg = self.cfg.GetVGName()
9733     self.disks = []
9734     for disk in self.op.disks:
9735       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9736       if mode not in constants.DISK_ACCESS_SET:
9737         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9738                                    mode, errors.ECODE_INVAL)
9739       size = disk.get(constants.IDISK_SIZE, None)
9740       if size is None:
9741         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9742       try:
9743         size = int(size)
9744       except (TypeError, ValueError):
9745         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9746                                    errors.ECODE_INVAL)
9747
9748       data_vg = disk.get(constants.IDISK_VG, default_vg)
9749       new_disk = {
9750         constants.IDISK_SIZE: size,
9751         constants.IDISK_MODE: mode,
9752         constants.IDISK_VG: data_vg,
9753         }
9754       if constants.IDISK_METAVG in disk:
9755         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9756       if constants.IDISK_ADOPT in disk:
9757         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9758       self.disks.append(new_disk)
9759
9760     if self.op.mode == constants.INSTANCE_IMPORT:
9761       disk_images = []
9762       for idx in range(len(self.disks)):
9763         option = "disk%d_dump" % idx
9764         if export_info.has_option(constants.INISECT_INS, option):
9765           # FIXME: are the old os-es, disk sizes, etc. useful?
9766           export_name = export_info.get(constants.INISECT_INS, option)
9767           image = utils.PathJoin(self.op.src_path, export_name)
9768           disk_images.append(image)
9769         else:
9770           disk_images.append(False)
9771
9772       self.src_images = disk_images
9773
9774       old_name = export_info.get(constants.INISECT_INS, "name")
9775       if self.op.instance_name == old_name:
9776         for idx, nic in enumerate(self.nics):
9777           if nic.mac == constants.VALUE_AUTO:
9778             nic_mac_ini = "nic%d_mac" % idx
9779             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9780
9781     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9782
9783     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9784     if self.op.ip_check:
9785       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9786         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9787                                    (self.check_ip, self.op.instance_name),
9788                                    errors.ECODE_NOTUNIQUE)
9789
9790     #### mac address generation
9791     # By generating here the mac address both the allocator and the hooks get
9792     # the real final mac address rather than the 'auto' or 'generate' value.
9793     # There is a race condition between the generation and the instance object
9794     # creation, which means that we know the mac is valid now, but we're not
9795     # sure it will be when we actually add the instance. If things go bad
9796     # adding the instance will abort because of a duplicate mac, and the
9797     # creation job will fail.
9798     for nic in self.nics:
9799       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9800         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9801
9802     #### allocator run
9803
9804     if self.op.iallocator is not None:
9805       self._RunAllocator()
9806
9807     # Release all unneeded node locks
9808     _ReleaseLocks(self, locking.LEVEL_NODE,
9809                   keep=filter(None, [self.op.pnode, self.op.snode,
9810                                      self.op.src_node]))
9811     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9812                   keep=filter(None, [self.op.pnode, self.op.snode,
9813                                      self.op.src_node]))
9814
9815     #### node related checks
9816
9817     # check primary node
9818     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9819     assert self.pnode is not None, \
9820       "Cannot retrieve locked node %s" % self.op.pnode
9821     if pnode.offline:
9822       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9823                                  pnode.name, errors.ECODE_STATE)
9824     if pnode.drained:
9825       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9826                                  pnode.name, errors.ECODE_STATE)
9827     if not pnode.vm_capable:
9828       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9829                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9830
9831     self.secondaries = []
9832
9833     # mirror node verification
9834     if self.op.disk_template in constants.DTS_INT_MIRROR:
9835       if self.op.snode == pnode.name:
9836         raise errors.OpPrereqError("The secondary node cannot be the"
9837                                    " primary node", errors.ECODE_INVAL)
9838       _CheckNodeOnline(self, self.op.snode)
9839       _CheckNodeNotDrained(self, self.op.snode)
9840       _CheckNodeVmCapable(self, self.op.snode)
9841       self.secondaries.append(self.op.snode)
9842
9843       snode = self.cfg.GetNodeInfo(self.op.snode)
9844       if pnode.group != snode.group:
9845         self.LogWarning("The primary and secondary nodes are in two"
9846                         " different node groups; the disk parameters"
9847                         " from the first disk's node group will be"
9848                         " used")
9849
9850     nodenames = [pnode.name] + self.secondaries
9851
9852     # Verify instance specs
9853     spindle_use = self.be_full.get(constants.BE_SPINDLE_USAGE, None)
9854     ispec = {
9855       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9856       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9857       constants.ISPEC_DISK_COUNT: len(self.disks),
9858       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9859       constants.ISPEC_NIC_COUNT: len(self.nics),
9860       constants.ISPEC_SPINDLE_USE: spindle_use,
9861       }
9862
9863     group_info = self.cfg.GetNodeGroup(pnode.group)
9864     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9865     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9866     if not self.op.ignore_ipolicy and res:
9867       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9868                                   " policy: %s") % (pnode.group,
9869                                                     utils.CommaJoin(res)),
9870                                   errors.ECODE_INVAL)
9871
9872     # disk parameters (not customizable at instance or node level)
9873     # just use the primary node parameters, ignoring the secondary.
9874     self.diskparams = group_info.diskparams
9875
9876     if not self.adopt_disks:
9877       if self.op.disk_template == constants.DT_RBD:
9878         # _CheckRADOSFreeSpace() is just a placeholder.
9879         # Any function that checks prerequisites can be placed here.
9880         # Check if there is enough space on the RADOS cluster.
9881         _CheckRADOSFreeSpace()
9882       else:
9883         # Check lv size requirements, if not adopting
9884         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9885         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9886
9887     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9888       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9889                                 disk[constants.IDISK_ADOPT])
9890                      for disk in self.disks])
9891       if len(all_lvs) != len(self.disks):
9892         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9893                                    errors.ECODE_INVAL)
9894       for lv_name in all_lvs:
9895         try:
9896           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9897           # to ReserveLV uses the same syntax
9898           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9899         except errors.ReservationError:
9900           raise errors.OpPrereqError("LV named %s used by another instance" %
9901                                      lv_name, errors.ECODE_NOTUNIQUE)
9902
9903       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9904       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9905
9906       node_lvs = self.rpc.call_lv_list([pnode.name],
9907                                        vg_names.payload.keys())[pnode.name]
9908       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9909       node_lvs = node_lvs.payload
9910
9911       delta = all_lvs.difference(node_lvs.keys())
9912       if delta:
9913         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9914                                    utils.CommaJoin(delta),
9915                                    errors.ECODE_INVAL)
9916       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9917       if online_lvs:
9918         raise errors.OpPrereqError("Online logical volumes found, cannot"
9919                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9920                                    errors.ECODE_STATE)
9921       # update the size of disk based on what is found
9922       for dsk in self.disks:
9923         dsk[constants.IDISK_SIZE] = \
9924           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9925                                         dsk[constants.IDISK_ADOPT])][0]))
9926
9927     elif self.op.disk_template == constants.DT_BLOCK:
9928       # Normalize and de-duplicate device paths
9929       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9930                        for disk in self.disks])
9931       if len(all_disks) != len(self.disks):
9932         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9933                                    errors.ECODE_INVAL)
9934       baddisks = [d for d in all_disks
9935                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9936       if baddisks:
9937         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9938                                    " cannot be adopted" %
9939                                    (", ".join(baddisks),
9940                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9941                                    errors.ECODE_INVAL)
9942
9943       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9944                                             list(all_disks))[pnode.name]
9945       node_disks.Raise("Cannot get block device information from node %s" %
9946                        pnode.name)
9947       node_disks = node_disks.payload
9948       delta = all_disks.difference(node_disks.keys())
9949       if delta:
9950         raise errors.OpPrereqError("Missing block device(s): %s" %
9951                                    utils.CommaJoin(delta),
9952                                    errors.ECODE_INVAL)
9953       for dsk in self.disks:
9954         dsk[constants.IDISK_SIZE] = \
9955           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9956
9957     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9958
9959     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9960     # check OS parameters (remotely)
9961     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9962
9963     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9964
9965     # memory check on primary node
9966     #TODO(dynmem): use MINMEM for checking
9967     if self.op.start:
9968       _CheckNodeFreeMemory(self, self.pnode.name,
9969                            "creating instance %s" % self.op.instance_name,
9970                            self.be_full[constants.BE_MAXMEM],
9971                            self.op.hypervisor)
9972
9973     self.dry_run_result = list(nodenames)
9974
9975   def Exec(self, feedback_fn):
9976     """Create and add the instance to the cluster.
9977
9978     """
9979     instance = self.op.instance_name
9980     pnode_name = self.pnode.name
9981
9982     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9983                 self.owned_locks(locking.LEVEL_NODE)), \
9984       "Node locks differ from node resource locks"
9985
9986     ht_kind = self.op.hypervisor
9987     if ht_kind in constants.HTS_REQ_PORT:
9988       network_port = self.cfg.AllocatePort()
9989     else:
9990       network_port = None
9991
9992     disks = _GenerateDiskTemplate(self,
9993                                   self.op.disk_template,
9994                                   instance, pnode_name,
9995                                   self.secondaries,
9996                                   self.disks,
9997                                   self.instance_file_storage_dir,
9998                                   self.op.file_driver,
9999                                   0,
10000                                   feedback_fn,
10001                                   self.diskparams)
10002
10003     iobj = objects.Instance(name=instance, os=self.op.os_type,
10004                             primary_node=pnode_name,
10005                             nics=self.nics, disks=disks,
10006                             disk_template=self.op.disk_template,
10007                             admin_state=constants.ADMINST_DOWN,
10008                             network_port=network_port,
10009                             beparams=self.op.beparams,
10010                             hvparams=self.op.hvparams,
10011                             hypervisor=self.op.hypervisor,
10012                             osparams=self.op.osparams,
10013                             )
10014
10015     if self.op.tags:
10016       for tag in self.op.tags:
10017         iobj.AddTag(tag)
10018
10019     if self.adopt_disks:
10020       if self.op.disk_template == constants.DT_PLAIN:
10021         # rename LVs to the newly-generated names; we need to construct
10022         # 'fake' LV disks with the old data, plus the new unique_id
10023         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10024         rename_to = []
10025         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10026           rename_to.append(t_dsk.logical_id)
10027           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10028           self.cfg.SetDiskID(t_dsk, pnode_name)
10029         result = self.rpc.call_blockdev_rename(pnode_name,
10030                                                zip(tmp_disks, rename_to))
10031         result.Raise("Failed to rename adoped LVs")
10032     else:
10033       feedback_fn("* creating instance disks...")
10034       try:
10035         _CreateDisks(self, iobj)
10036       except errors.OpExecError:
10037         self.LogWarning("Device creation failed, reverting...")
10038         try:
10039           _RemoveDisks(self, iobj)
10040         finally:
10041           self.cfg.ReleaseDRBDMinors(instance)
10042           raise
10043
10044     feedback_fn("adding instance %s to cluster config" % instance)
10045
10046     self.cfg.AddInstance(iobj, self.proc.GetECId())
10047
10048     # Declare that we don't want to remove the instance lock anymore, as we've
10049     # added the instance to the config
10050     del self.remove_locks[locking.LEVEL_INSTANCE]
10051
10052     if self.op.mode == constants.INSTANCE_IMPORT:
10053       # Release unused nodes
10054       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10055     else:
10056       # Release all nodes
10057       _ReleaseLocks(self, locking.LEVEL_NODE)
10058
10059     disk_abort = False
10060     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10061       feedback_fn("* wiping instance disks...")
10062       try:
10063         _WipeDisks(self, iobj)
10064       except errors.OpExecError, err:
10065         logging.exception("Wiping disks failed")
10066         self.LogWarning("Wiping instance disks failed (%s)", err)
10067         disk_abort = True
10068
10069     if disk_abort:
10070       # Something is already wrong with the disks, don't do anything else
10071       pass
10072     elif self.op.wait_for_sync:
10073       disk_abort = not _WaitForSync(self, iobj)
10074     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10075       # make sure the disks are not degraded (still sync-ing is ok)
10076       feedback_fn("* checking mirrors status")
10077       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10078     else:
10079       disk_abort = False
10080
10081     if disk_abort:
10082       _RemoveDisks(self, iobj)
10083       self.cfg.RemoveInstance(iobj.name)
10084       # Make sure the instance lock gets removed
10085       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10086       raise errors.OpExecError("There are some degraded disks for"
10087                                " this instance")
10088
10089     # Release all node resource locks
10090     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10091
10092     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10093       if self.op.mode == constants.INSTANCE_CREATE:
10094         if not self.op.no_install:
10095           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10096                         not self.op.wait_for_sync)
10097           if pause_sync:
10098             feedback_fn("* pausing disk sync to install instance OS")
10099             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10100                                                               iobj.disks, True)
10101             for idx, success in enumerate(result.payload):
10102               if not success:
10103                 logging.warn("pause-sync of instance %s for disk %d failed",
10104                              instance, idx)
10105
10106           feedback_fn("* running the instance OS create scripts...")
10107           # FIXME: pass debug option from opcode to backend
10108           os_add_result = \
10109             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10110                                           self.op.debug_level)
10111           if pause_sync:
10112             feedback_fn("* resuming disk sync")
10113             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10114                                                               iobj.disks, False)
10115             for idx, success in enumerate(result.payload):
10116               if not success:
10117                 logging.warn("resume-sync of instance %s for disk %d failed",
10118                              instance, idx)
10119
10120           os_add_result.Raise("Could not add os for instance %s"
10121                               " on node %s" % (instance, pnode_name))
10122
10123       elif self.op.mode == constants.INSTANCE_IMPORT:
10124         feedback_fn("* running the instance OS import scripts...")
10125
10126         transfers = []
10127
10128         for idx, image in enumerate(self.src_images):
10129           if not image:
10130             continue
10131
10132           # FIXME: pass debug option from opcode to backend
10133           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10134                                              constants.IEIO_FILE, (image, ),
10135                                              constants.IEIO_SCRIPT,
10136                                              (iobj.disks[idx], idx),
10137                                              None)
10138           transfers.append(dt)
10139
10140         import_result = \
10141           masterd.instance.TransferInstanceData(self, feedback_fn,
10142                                                 self.op.src_node, pnode_name,
10143                                                 self.pnode.secondary_ip,
10144                                                 iobj, transfers)
10145         if not compat.all(import_result):
10146           self.LogWarning("Some disks for instance %s on node %s were not"
10147                           " imported successfully" % (instance, pnode_name))
10148
10149       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10150         feedback_fn("* preparing remote import...")
10151         # The source cluster will stop the instance before attempting to make a
10152         # connection. In some cases stopping an instance can take a long time,
10153         # hence the shutdown timeout is added to the connection timeout.
10154         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10155                            self.op.source_shutdown_timeout)
10156         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10157
10158         assert iobj.primary_node == self.pnode.name
10159         disk_results = \
10160           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10161                                         self.source_x509_ca,
10162                                         self._cds, timeouts)
10163         if not compat.all(disk_results):
10164           # TODO: Should the instance still be started, even if some disks
10165           # failed to import (valid for local imports, too)?
10166           self.LogWarning("Some disks for instance %s on node %s were not"
10167                           " imported successfully" % (instance, pnode_name))
10168
10169         # Run rename script on newly imported instance
10170         assert iobj.name == instance
10171         feedback_fn("Running rename script for %s" % instance)
10172         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10173                                                    self.source_instance_name,
10174                                                    self.op.debug_level)
10175         if result.fail_msg:
10176           self.LogWarning("Failed to run rename script for %s on node"
10177                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10178
10179       else:
10180         # also checked in the prereq part
10181         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10182                                      % self.op.mode)
10183
10184     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10185
10186     if self.op.start:
10187       iobj.admin_state = constants.ADMINST_UP
10188       self.cfg.Update(iobj, feedback_fn)
10189       logging.info("Starting instance %s on node %s", instance, pnode_name)
10190       feedback_fn("* starting instance...")
10191       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10192                                             False)
10193       result.Raise("Could not start instance")
10194
10195     return list(iobj.all_nodes)
10196
10197
10198 def _CheckRADOSFreeSpace():
10199   """Compute disk size requirements inside the RADOS cluster.
10200
10201   """
10202   # For the RADOS cluster we assume there is always enough space.
10203   pass
10204
10205
10206 class LUInstanceConsole(NoHooksLU):
10207   """Connect to an instance's console.
10208
10209   This is somewhat special in that it returns the command line that
10210   you need to run on the master node in order to connect to the
10211   console.
10212
10213   """
10214   REQ_BGL = False
10215
10216   def ExpandNames(self):
10217     self.share_locks = _ShareAll()
10218     self._ExpandAndLockInstance()
10219
10220   def CheckPrereq(self):
10221     """Check prerequisites.
10222
10223     This checks that the instance is in the cluster.
10224
10225     """
10226     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10227     assert self.instance is not None, \
10228       "Cannot retrieve locked instance %s" % self.op.instance_name
10229     _CheckNodeOnline(self, self.instance.primary_node)
10230
10231   def Exec(self, feedback_fn):
10232     """Connect to the console of an instance
10233
10234     """
10235     instance = self.instance
10236     node = instance.primary_node
10237
10238     node_insts = self.rpc.call_instance_list([node],
10239                                              [instance.hypervisor])[node]
10240     node_insts.Raise("Can't get node information from %s" % node)
10241
10242     if instance.name not in node_insts.payload:
10243       if instance.admin_state == constants.ADMINST_UP:
10244         state = constants.INSTST_ERRORDOWN
10245       elif instance.admin_state == constants.ADMINST_DOWN:
10246         state = constants.INSTST_ADMINDOWN
10247       else:
10248         state = constants.INSTST_ADMINOFFLINE
10249       raise errors.OpExecError("Instance %s is not running (state %s)" %
10250                                (instance.name, state))
10251
10252     logging.debug("Connecting to console of %s on %s", instance.name, node)
10253
10254     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10255
10256
10257 def _GetInstanceConsole(cluster, instance):
10258   """Returns console information for an instance.
10259
10260   @type cluster: L{objects.Cluster}
10261   @type instance: L{objects.Instance}
10262   @rtype: dict
10263
10264   """
10265   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10266   # beparams and hvparams are passed separately, to avoid editing the
10267   # instance and then saving the defaults in the instance itself.
10268   hvparams = cluster.FillHV(instance)
10269   beparams = cluster.FillBE(instance)
10270   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10271
10272   assert console.instance == instance.name
10273   assert console.Validate()
10274
10275   return console.ToDict()
10276
10277
10278 class LUInstanceReplaceDisks(LogicalUnit):
10279   """Replace the disks of an instance.
10280
10281   """
10282   HPATH = "mirrors-replace"
10283   HTYPE = constants.HTYPE_INSTANCE
10284   REQ_BGL = False
10285
10286   def CheckArguments(self):
10287     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10288                                   self.op.iallocator)
10289
10290   def ExpandNames(self):
10291     self._ExpandAndLockInstance()
10292
10293     assert locking.LEVEL_NODE not in self.needed_locks
10294     assert locking.LEVEL_NODE_RES not in self.needed_locks
10295     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10296
10297     assert self.op.iallocator is None or self.op.remote_node is None, \
10298       "Conflicting options"
10299
10300     if self.op.remote_node is not None:
10301       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10302
10303       # Warning: do not remove the locking of the new secondary here
10304       # unless DRBD8.AddChildren is changed to work in parallel;
10305       # currently it doesn't since parallel invocations of
10306       # FindUnusedMinor will conflict
10307       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10308       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10309     else:
10310       self.needed_locks[locking.LEVEL_NODE] = []
10311       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10312
10313       if self.op.iallocator is not None:
10314         # iallocator will select a new node in the same group
10315         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10316
10317     self.needed_locks[locking.LEVEL_NODE_RES] = []
10318
10319     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10320                                    self.op.iallocator, self.op.remote_node,
10321                                    self.op.disks, False, self.op.early_release,
10322                                    self.op.ignore_ipolicy)
10323
10324     self.tasklets = [self.replacer]
10325
10326   def DeclareLocks(self, level):
10327     if level == locking.LEVEL_NODEGROUP:
10328       assert self.op.remote_node is None
10329       assert self.op.iallocator is not None
10330       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10331
10332       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10333       # Lock all groups used by instance optimistically; this requires going
10334       # via the node before it's locked, requiring verification later on
10335       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10336         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10337
10338     elif level == locking.LEVEL_NODE:
10339       if self.op.iallocator is not None:
10340         assert self.op.remote_node is None
10341         assert not self.needed_locks[locking.LEVEL_NODE]
10342
10343         # Lock member nodes of all locked groups
10344         self.needed_locks[locking.LEVEL_NODE] = [node_name
10345           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10346           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10347       else:
10348         self._LockInstancesNodes()
10349     elif level == locking.LEVEL_NODE_RES:
10350       # Reuse node locks
10351       self.needed_locks[locking.LEVEL_NODE_RES] = \
10352         self.needed_locks[locking.LEVEL_NODE]
10353
10354   def BuildHooksEnv(self):
10355     """Build hooks env.
10356
10357     This runs on the master, the primary and all the secondaries.
10358
10359     """
10360     instance = self.replacer.instance
10361     env = {
10362       "MODE": self.op.mode,
10363       "NEW_SECONDARY": self.op.remote_node,
10364       "OLD_SECONDARY": instance.secondary_nodes[0],
10365       }
10366     env.update(_BuildInstanceHookEnvByObject(self, instance))
10367     return env
10368
10369   def BuildHooksNodes(self):
10370     """Build hooks nodes.
10371
10372     """
10373     instance = self.replacer.instance
10374     nl = [
10375       self.cfg.GetMasterNode(),
10376       instance.primary_node,
10377       ]
10378     if self.op.remote_node is not None:
10379       nl.append(self.op.remote_node)
10380     return nl, nl
10381
10382   def CheckPrereq(self):
10383     """Check prerequisites.
10384
10385     """
10386     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10387             self.op.iallocator is None)
10388
10389     # Verify if node group locks are still correct
10390     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10391     if owned_groups:
10392       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10393
10394     return LogicalUnit.CheckPrereq(self)
10395
10396
10397 class TLReplaceDisks(Tasklet):
10398   """Replaces disks for an instance.
10399
10400   Note: Locking is not within the scope of this class.
10401
10402   """
10403   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10404                disks, delay_iallocator, early_release, ignore_ipolicy):
10405     """Initializes this class.
10406
10407     """
10408     Tasklet.__init__(self, lu)
10409
10410     # Parameters
10411     self.instance_name = instance_name
10412     self.mode = mode
10413     self.iallocator_name = iallocator_name
10414     self.remote_node = remote_node
10415     self.disks = disks
10416     self.delay_iallocator = delay_iallocator
10417     self.early_release = early_release
10418     self.ignore_ipolicy = ignore_ipolicy
10419
10420     # Runtime data
10421     self.instance = None
10422     self.new_node = None
10423     self.target_node = None
10424     self.other_node = None
10425     self.remote_node_info = None
10426     self.node_secondary_ip = None
10427
10428   @staticmethod
10429   def CheckArguments(mode, remote_node, iallocator):
10430     """Helper function for users of this class.
10431
10432     """
10433     # check for valid parameter combination
10434     if mode == constants.REPLACE_DISK_CHG:
10435       if remote_node is None and iallocator is None:
10436         raise errors.OpPrereqError("When changing the secondary either an"
10437                                    " iallocator script must be used or the"
10438                                    " new node given", errors.ECODE_INVAL)
10439
10440       if remote_node is not None and iallocator is not None:
10441         raise errors.OpPrereqError("Give either the iallocator or the new"
10442                                    " secondary, not both", errors.ECODE_INVAL)
10443
10444     elif remote_node is not None or iallocator is not None:
10445       # Not replacing the secondary
10446       raise errors.OpPrereqError("The iallocator and new node options can"
10447                                  " only be used when changing the"
10448                                  " secondary node", errors.ECODE_INVAL)
10449
10450   @staticmethod
10451   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10452     """Compute a new secondary node using an IAllocator.
10453
10454     """
10455     ial = IAllocator(lu.cfg, lu.rpc,
10456                      mode=constants.IALLOCATOR_MODE_RELOC,
10457                      name=instance_name,
10458                      relocate_from=list(relocate_from))
10459
10460     ial.Run(iallocator_name)
10461
10462     if not ial.success:
10463       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10464                                  " %s" % (iallocator_name, ial.info),
10465                                  errors.ECODE_NORES)
10466
10467     if len(ial.result) != ial.required_nodes:
10468       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10469                                  " of nodes (%s), required %s" %
10470                                  (iallocator_name,
10471                                   len(ial.result), ial.required_nodes),
10472                                  errors.ECODE_FAULT)
10473
10474     remote_node_name = ial.result[0]
10475
10476     lu.LogInfo("Selected new secondary for instance '%s': %s",
10477                instance_name, remote_node_name)
10478
10479     return remote_node_name
10480
10481   def _FindFaultyDisks(self, node_name):
10482     """Wrapper for L{_FindFaultyInstanceDisks}.
10483
10484     """
10485     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10486                                     node_name, True)
10487
10488   def _CheckDisksActivated(self, instance):
10489     """Checks if the instance disks are activated.
10490
10491     @param instance: The instance to check disks
10492     @return: True if they are activated, False otherwise
10493
10494     """
10495     nodes = instance.all_nodes
10496
10497     for idx, dev in enumerate(instance.disks):
10498       for node in nodes:
10499         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10500         self.cfg.SetDiskID(dev, node)
10501
10502         result = self.rpc.call_blockdev_find(node, dev)
10503
10504         if result.offline:
10505           continue
10506         elif result.fail_msg or not result.payload:
10507           return False
10508
10509     return True
10510
10511   def CheckPrereq(self):
10512     """Check prerequisites.
10513
10514     This checks that the instance is in the cluster.
10515
10516     """
10517     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10518     assert instance is not None, \
10519       "Cannot retrieve locked instance %s" % self.instance_name
10520
10521     if instance.disk_template != constants.DT_DRBD8:
10522       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10523                                  " instances", errors.ECODE_INVAL)
10524
10525     if len(instance.secondary_nodes) != 1:
10526       raise errors.OpPrereqError("The instance has a strange layout,"
10527                                  " expected one secondary but found %d" %
10528                                  len(instance.secondary_nodes),
10529                                  errors.ECODE_FAULT)
10530
10531     if not self.delay_iallocator:
10532       self._CheckPrereq2()
10533
10534   def _CheckPrereq2(self):
10535     """Check prerequisites, second part.
10536
10537     This function should always be part of CheckPrereq. It was separated and is
10538     now called from Exec because during node evacuation iallocator was only
10539     called with an unmodified cluster model, not taking planned changes into
10540     account.
10541
10542     """
10543     instance = self.instance
10544     secondary_node = instance.secondary_nodes[0]
10545
10546     if self.iallocator_name is None:
10547       remote_node = self.remote_node
10548     else:
10549       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10550                                        instance.name, instance.secondary_nodes)
10551
10552     if remote_node is None:
10553       self.remote_node_info = None
10554     else:
10555       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10556              "Remote node '%s' is not locked" % remote_node
10557
10558       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10559       assert self.remote_node_info is not None, \
10560         "Cannot retrieve locked node %s" % remote_node
10561
10562     if remote_node == self.instance.primary_node:
10563       raise errors.OpPrereqError("The specified node is the primary node of"
10564                                  " the instance", errors.ECODE_INVAL)
10565
10566     if remote_node == secondary_node:
10567       raise errors.OpPrereqError("The specified node is already the"
10568                                  " secondary node of the instance",
10569                                  errors.ECODE_INVAL)
10570
10571     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10572                                     constants.REPLACE_DISK_CHG):
10573       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10574                                  errors.ECODE_INVAL)
10575
10576     if self.mode == constants.REPLACE_DISK_AUTO:
10577       if not self._CheckDisksActivated(instance):
10578         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10579                                    " first" % self.instance_name,
10580                                    errors.ECODE_STATE)
10581       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10582       faulty_secondary = self._FindFaultyDisks(secondary_node)
10583
10584       if faulty_primary and faulty_secondary:
10585         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10586                                    " one node and can not be repaired"
10587                                    " automatically" % self.instance_name,
10588                                    errors.ECODE_STATE)
10589
10590       if faulty_primary:
10591         self.disks = faulty_primary
10592         self.target_node = instance.primary_node
10593         self.other_node = secondary_node
10594         check_nodes = [self.target_node, self.other_node]
10595       elif faulty_secondary:
10596         self.disks = faulty_secondary
10597         self.target_node = secondary_node
10598         self.other_node = instance.primary_node
10599         check_nodes = [self.target_node, self.other_node]
10600       else:
10601         self.disks = []
10602         check_nodes = []
10603
10604     else:
10605       # Non-automatic modes
10606       if self.mode == constants.REPLACE_DISK_PRI:
10607         self.target_node = instance.primary_node
10608         self.other_node = secondary_node
10609         check_nodes = [self.target_node, self.other_node]
10610
10611       elif self.mode == constants.REPLACE_DISK_SEC:
10612         self.target_node = secondary_node
10613         self.other_node = instance.primary_node
10614         check_nodes = [self.target_node, self.other_node]
10615
10616       elif self.mode == constants.REPLACE_DISK_CHG:
10617         self.new_node = remote_node
10618         self.other_node = instance.primary_node
10619         self.target_node = secondary_node
10620         check_nodes = [self.new_node, self.other_node]
10621
10622         _CheckNodeNotDrained(self.lu, remote_node)
10623         _CheckNodeVmCapable(self.lu, remote_node)
10624
10625         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10626         assert old_node_info is not None
10627         if old_node_info.offline and not self.early_release:
10628           # doesn't make sense to delay the release
10629           self.early_release = True
10630           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10631                           " early-release mode", secondary_node)
10632
10633       else:
10634         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10635                                      self.mode)
10636
10637       # If not specified all disks should be replaced
10638       if not self.disks:
10639         self.disks = range(len(self.instance.disks))
10640
10641     # TODO: This is ugly, but right now we can't distinguish between internal
10642     # submitted opcode and external one. We should fix that.
10643     if self.remote_node_info:
10644       # We change the node, lets verify it still meets instance policy
10645       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10646       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10647                                        new_group_info)
10648       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10649                               ignore=self.ignore_ipolicy)
10650
10651     # TODO: compute disk parameters
10652     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10653     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10654     if primary_node_info.group != secondary_node_info.group:
10655       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10656                       " different node groups; the disk parameters of the"
10657                       " primary node's group will be applied.")
10658
10659     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10660
10661     for node in check_nodes:
10662       _CheckNodeOnline(self.lu, node)
10663
10664     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10665                                                           self.other_node,
10666                                                           self.target_node]
10667                               if node_name is not None)
10668
10669     # Release unneeded node and node resource locks
10670     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10671     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10672
10673     # Release any owned node group
10674     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10675       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10676
10677     # Check whether disks are valid
10678     for disk_idx in self.disks:
10679       instance.FindDisk(disk_idx)
10680
10681     # Get secondary node IP addresses
10682     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10683                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10684
10685   def Exec(self, feedback_fn):
10686     """Execute disk replacement.
10687
10688     This dispatches the disk replacement to the appropriate handler.
10689
10690     """
10691     if self.delay_iallocator:
10692       self._CheckPrereq2()
10693
10694     if __debug__:
10695       # Verify owned locks before starting operation
10696       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10697       assert set(owned_nodes) == set(self.node_secondary_ip), \
10698           ("Incorrect node locks, owning %s, expected %s" %
10699            (owned_nodes, self.node_secondary_ip.keys()))
10700       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10701               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10702
10703       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10704       assert list(owned_instances) == [self.instance_name], \
10705           "Instance '%s' not locked" % self.instance_name
10706
10707       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10708           "Should not own any node group lock at this point"
10709
10710     if not self.disks:
10711       feedback_fn("No disks need replacement")
10712       return
10713
10714     feedback_fn("Replacing disk(s) %s for %s" %
10715                 (utils.CommaJoin(self.disks), self.instance.name))
10716
10717     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10718
10719     # Activate the instance disks if we're replacing them on a down instance
10720     if activate_disks:
10721       _StartInstanceDisks(self.lu, self.instance, True)
10722
10723     try:
10724       # Should we replace the secondary node?
10725       if self.new_node is not None:
10726         fn = self._ExecDrbd8Secondary
10727       else:
10728         fn = self._ExecDrbd8DiskOnly
10729
10730       result = fn(feedback_fn)
10731     finally:
10732       # Deactivate the instance disks if we're replacing them on a
10733       # down instance
10734       if activate_disks:
10735         _SafeShutdownInstanceDisks(self.lu, self.instance)
10736
10737     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10738
10739     if __debug__:
10740       # Verify owned locks
10741       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10742       nodes = frozenset(self.node_secondary_ip)
10743       assert ((self.early_release and not owned_nodes) or
10744               (not self.early_release and not (set(owned_nodes) - nodes))), \
10745         ("Not owning the correct locks, early_release=%s, owned=%r,"
10746          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10747
10748     return result
10749
10750   def _CheckVolumeGroup(self, nodes):
10751     self.lu.LogInfo("Checking volume groups")
10752
10753     vgname = self.cfg.GetVGName()
10754
10755     # Make sure volume group exists on all involved nodes
10756     results = self.rpc.call_vg_list(nodes)
10757     if not results:
10758       raise errors.OpExecError("Can't list volume groups on the nodes")
10759
10760     for node in nodes:
10761       res = results[node]
10762       res.Raise("Error checking node %s" % node)
10763       if vgname not in res.payload:
10764         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10765                                  (vgname, node))
10766
10767   def _CheckDisksExistence(self, nodes):
10768     # Check disk existence
10769     for idx, dev in enumerate(self.instance.disks):
10770       if idx not in self.disks:
10771         continue
10772
10773       for node in nodes:
10774         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10775         self.cfg.SetDiskID(dev, node)
10776
10777         result = self.rpc.call_blockdev_find(node, dev)
10778
10779         msg = result.fail_msg
10780         if msg or not result.payload:
10781           if not msg:
10782             msg = "disk not found"
10783           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10784                                    (idx, node, msg))
10785
10786   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10787     for idx, dev in enumerate(self.instance.disks):
10788       if idx not in self.disks:
10789         continue
10790
10791       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10792                       (idx, node_name))
10793
10794       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10795                                    ldisk=ldisk):
10796         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10797                                  " replace disks for instance %s" %
10798                                  (node_name, self.instance.name))
10799
10800   def _CreateNewStorage(self, node_name):
10801     """Create new storage on the primary or secondary node.
10802
10803     This is only used for same-node replaces, not for changing the
10804     secondary node, hence we don't want to modify the existing disk.
10805
10806     """
10807     iv_names = {}
10808
10809     for idx, dev in enumerate(self.instance.disks):
10810       if idx not in self.disks:
10811         continue
10812
10813       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10814
10815       self.cfg.SetDiskID(dev, node_name)
10816
10817       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10818       names = _GenerateUniqueNames(self.lu, lv_names)
10819
10820       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10821
10822       vg_data = dev.children[0].logical_id[0]
10823       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10824                              logical_id=(vg_data, names[0]), params=data_p)
10825       vg_meta = dev.children[1].logical_id[0]
10826       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10827                              logical_id=(vg_meta, names[1]), params=meta_p)
10828
10829       new_lvs = [lv_data, lv_meta]
10830       old_lvs = [child.Copy() for child in dev.children]
10831       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10832
10833       # we pass force_create=True to force the LVM creation
10834       for new_lv in new_lvs:
10835         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10836                         _GetInstanceInfoText(self.instance), False)
10837
10838     return iv_names
10839
10840   def _CheckDevices(self, node_name, iv_names):
10841     for name, (dev, _, _) in iv_names.iteritems():
10842       self.cfg.SetDiskID(dev, node_name)
10843
10844       result = self.rpc.call_blockdev_find(node_name, dev)
10845
10846       msg = result.fail_msg
10847       if msg or not result.payload:
10848         if not msg:
10849           msg = "disk not found"
10850         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10851                                  (name, msg))
10852
10853       if result.payload.is_degraded:
10854         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10855
10856   def _RemoveOldStorage(self, node_name, iv_names):
10857     for name, (_, old_lvs, _) in iv_names.iteritems():
10858       self.lu.LogInfo("Remove logical volumes for %s" % name)
10859
10860       for lv in old_lvs:
10861         self.cfg.SetDiskID(lv, node_name)
10862
10863         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10864         if msg:
10865           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10866                              hint="remove unused LVs manually")
10867
10868   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10869     """Replace a disk on the primary or secondary for DRBD 8.
10870
10871     The algorithm for replace is quite complicated:
10872
10873       1. for each disk to be replaced:
10874
10875         1. create new LVs on the target node with unique names
10876         1. detach old LVs from the drbd device
10877         1. rename old LVs to name_replaced.<time_t>
10878         1. rename new LVs to old LVs
10879         1. attach the new LVs (with the old names now) to the drbd device
10880
10881       1. wait for sync across all devices
10882
10883       1. for each modified disk:
10884
10885         1. remove old LVs (which have the name name_replaces.<time_t>)
10886
10887     Failures are not very well handled.
10888
10889     """
10890     steps_total = 6
10891
10892     # Step: check device activation
10893     self.lu.LogStep(1, steps_total, "Check device existence")
10894     self._CheckDisksExistence([self.other_node, self.target_node])
10895     self._CheckVolumeGroup([self.target_node, self.other_node])
10896
10897     # Step: check other node consistency
10898     self.lu.LogStep(2, steps_total, "Check peer consistency")
10899     self._CheckDisksConsistency(self.other_node,
10900                                 self.other_node == self.instance.primary_node,
10901                                 False)
10902
10903     # Step: create new storage
10904     self.lu.LogStep(3, steps_total, "Allocate new storage")
10905     iv_names = self._CreateNewStorage(self.target_node)
10906
10907     # Step: for each lv, detach+rename*2+attach
10908     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10909     for dev, old_lvs, new_lvs in iv_names.itervalues():
10910       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10911
10912       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10913                                                      old_lvs)
10914       result.Raise("Can't detach drbd from local storage on node"
10915                    " %s for device %s" % (self.target_node, dev.iv_name))
10916       #dev.children = []
10917       #cfg.Update(instance)
10918
10919       # ok, we created the new LVs, so now we know we have the needed
10920       # storage; as such, we proceed on the target node to rename
10921       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10922       # using the assumption that logical_id == physical_id (which in
10923       # turn is the unique_id on that node)
10924
10925       # FIXME(iustin): use a better name for the replaced LVs
10926       temp_suffix = int(time.time())
10927       ren_fn = lambda d, suff: (d.physical_id[0],
10928                                 d.physical_id[1] + "_replaced-%s" % suff)
10929
10930       # Build the rename list based on what LVs exist on the node
10931       rename_old_to_new = []
10932       for to_ren in old_lvs:
10933         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10934         if not result.fail_msg and result.payload:
10935           # device exists
10936           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10937
10938       self.lu.LogInfo("Renaming the old LVs on the target node")
10939       result = self.rpc.call_blockdev_rename(self.target_node,
10940                                              rename_old_to_new)
10941       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10942
10943       # Now we rename the new LVs to the old LVs
10944       self.lu.LogInfo("Renaming the new LVs on the target node")
10945       rename_new_to_old = [(new, old.physical_id)
10946                            for old, new in zip(old_lvs, new_lvs)]
10947       result = self.rpc.call_blockdev_rename(self.target_node,
10948                                              rename_new_to_old)
10949       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10950
10951       # Intermediate steps of in memory modifications
10952       for old, new in zip(old_lvs, new_lvs):
10953         new.logical_id = old.logical_id
10954         self.cfg.SetDiskID(new, self.target_node)
10955
10956       # We need to modify old_lvs so that removal later removes the
10957       # right LVs, not the newly added ones; note that old_lvs is a
10958       # copy here
10959       for disk in old_lvs:
10960         disk.logical_id = ren_fn(disk, temp_suffix)
10961         self.cfg.SetDiskID(disk, self.target_node)
10962
10963       # Now that the new lvs have the old name, we can add them to the device
10964       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10965       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10966                                                   new_lvs)
10967       msg = result.fail_msg
10968       if msg:
10969         for new_lv in new_lvs:
10970           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10971                                                new_lv).fail_msg
10972           if msg2:
10973             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10974                                hint=("cleanup manually the unused logical"
10975                                      "volumes"))
10976         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10977
10978     cstep = itertools.count(5)
10979
10980     if self.early_release:
10981       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10982       self._RemoveOldStorage(self.target_node, iv_names)
10983       # TODO: Check if releasing locks early still makes sense
10984       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10985     else:
10986       # Release all resource locks except those used by the instance
10987       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10988                     keep=self.node_secondary_ip.keys())
10989
10990     # Release all node locks while waiting for sync
10991     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10992
10993     # TODO: Can the instance lock be downgraded here? Take the optional disk
10994     # shutdown in the caller into consideration.
10995
10996     # Wait for sync
10997     # This can fail as the old devices are degraded and _WaitForSync
10998     # does a combined result over all disks, so we don't check its return value
10999     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11000     _WaitForSync(self.lu, self.instance)
11001
11002     # Check all devices manually
11003     self._CheckDevices(self.instance.primary_node, iv_names)
11004
11005     # Step: remove old storage
11006     if not self.early_release:
11007       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11008       self._RemoveOldStorage(self.target_node, iv_names)
11009
11010   def _ExecDrbd8Secondary(self, feedback_fn):
11011     """Replace the secondary node for DRBD 8.
11012
11013     The algorithm for replace is quite complicated:
11014       - for all disks of the instance:
11015         - create new LVs on the new node with same names
11016         - shutdown the drbd device on the old secondary
11017         - disconnect the drbd network on the primary
11018         - create the drbd device on the new secondary
11019         - network attach the drbd on the primary, using an artifice:
11020           the drbd code for Attach() will connect to the network if it
11021           finds a device which is connected to the good local disks but
11022           not network enabled
11023       - wait for sync across all devices
11024       - remove all disks from the old secondary
11025
11026     Failures are not very well handled.
11027
11028     """
11029     steps_total = 6
11030
11031     pnode = self.instance.primary_node
11032
11033     # Step: check device activation
11034     self.lu.LogStep(1, steps_total, "Check device existence")
11035     self._CheckDisksExistence([self.instance.primary_node])
11036     self._CheckVolumeGroup([self.instance.primary_node])
11037
11038     # Step: check other node consistency
11039     self.lu.LogStep(2, steps_total, "Check peer consistency")
11040     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11041
11042     # Step: create new storage
11043     self.lu.LogStep(3, steps_total, "Allocate new storage")
11044     for idx, dev in enumerate(self.instance.disks):
11045       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11046                       (self.new_node, idx))
11047       # we pass force_create=True to force LVM creation
11048       for new_lv in dev.children:
11049         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11050                         _GetInstanceInfoText(self.instance), False)
11051
11052     # Step 4: dbrd minors and drbd setups changes
11053     # after this, we must manually remove the drbd minors on both the
11054     # error and the success paths
11055     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11056     minors = self.cfg.AllocateDRBDMinor([self.new_node
11057                                          for dev in self.instance.disks],
11058                                         self.instance.name)
11059     logging.debug("Allocated minors %r", minors)
11060
11061     iv_names = {}
11062     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11063       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11064                       (self.new_node, idx))
11065       # create new devices on new_node; note that we create two IDs:
11066       # one without port, so the drbd will be activated without
11067       # networking information on the new node at this stage, and one
11068       # with network, for the latter activation in step 4
11069       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11070       if self.instance.primary_node == o_node1:
11071         p_minor = o_minor1
11072       else:
11073         assert self.instance.primary_node == o_node2, "Three-node instance?"
11074         p_minor = o_minor2
11075
11076       new_alone_id = (self.instance.primary_node, self.new_node, None,
11077                       p_minor, new_minor, o_secret)
11078       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11079                     p_minor, new_minor, o_secret)
11080
11081       iv_names[idx] = (dev, dev.children, new_net_id)
11082       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11083                     new_net_id)
11084       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11085       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11086                               logical_id=new_alone_id,
11087                               children=dev.children,
11088                               size=dev.size,
11089                               params=drbd_params)
11090       try:
11091         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11092                               _GetInstanceInfoText(self.instance), False)
11093       except errors.GenericError:
11094         self.cfg.ReleaseDRBDMinors(self.instance.name)
11095         raise
11096
11097     # We have new devices, shutdown the drbd on the old secondary
11098     for idx, dev in enumerate(self.instance.disks):
11099       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11100       self.cfg.SetDiskID(dev, self.target_node)
11101       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11102       if msg:
11103         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11104                            "node: %s" % (idx, msg),
11105                            hint=("Please cleanup this device manually as"
11106                                  " soon as possible"))
11107
11108     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11109     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11110                                                self.instance.disks)[pnode]
11111
11112     msg = result.fail_msg
11113     if msg:
11114       # detaches didn't succeed (unlikely)
11115       self.cfg.ReleaseDRBDMinors(self.instance.name)
11116       raise errors.OpExecError("Can't detach the disks from the network on"
11117                                " old node: %s" % (msg,))
11118
11119     # if we managed to detach at least one, we update all the disks of
11120     # the instance to point to the new secondary
11121     self.lu.LogInfo("Updating instance configuration")
11122     for dev, _, new_logical_id in iv_names.itervalues():
11123       dev.logical_id = new_logical_id
11124       self.cfg.SetDiskID(dev, self.instance.primary_node)
11125
11126     self.cfg.Update(self.instance, feedback_fn)
11127
11128     # Release all node locks (the configuration has been updated)
11129     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11130
11131     # and now perform the drbd attach
11132     self.lu.LogInfo("Attaching primary drbds to new secondary"
11133                     " (standalone => connected)")
11134     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11135                                             self.new_node],
11136                                            self.node_secondary_ip,
11137                                            self.instance.disks,
11138                                            self.instance.name,
11139                                            False)
11140     for to_node, to_result in result.items():
11141       msg = to_result.fail_msg
11142       if msg:
11143         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11144                            to_node, msg,
11145                            hint=("please do a gnt-instance info to see the"
11146                                  " status of disks"))
11147
11148     cstep = itertools.count(5)
11149
11150     if self.early_release:
11151       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11152       self._RemoveOldStorage(self.target_node, iv_names)
11153       # TODO: Check if releasing locks early still makes sense
11154       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11155     else:
11156       # Release all resource locks except those used by the instance
11157       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11158                     keep=self.node_secondary_ip.keys())
11159
11160     # TODO: Can the instance lock be downgraded here? Take the optional disk
11161     # shutdown in the caller into consideration.
11162
11163     # Wait for sync
11164     # This can fail as the old devices are degraded and _WaitForSync
11165     # does a combined result over all disks, so we don't check its return value
11166     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11167     _WaitForSync(self.lu, self.instance)
11168
11169     # Check all devices manually
11170     self._CheckDevices(self.instance.primary_node, iv_names)
11171
11172     # Step: remove old storage
11173     if not self.early_release:
11174       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11175       self._RemoveOldStorage(self.target_node, iv_names)
11176
11177
11178 class LURepairNodeStorage(NoHooksLU):
11179   """Repairs the volume group on a node.
11180
11181   """
11182   REQ_BGL = False
11183
11184   def CheckArguments(self):
11185     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11186
11187     storage_type = self.op.storage_type
11188
11189     if (constants.SO_FIX_CONSISTENCY not in
11190         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11191       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11192                                  " repaired" % storage_type,
11193                                  errors.ECODE_INVAL)
11194
11195   def ExpandNames(self):
11196     self.needed_locks = {
11197       locking.LEVEL_NODE: [self.op.node_name],
11198       }
11199
11200   def _CheckFaultyDisks(self, instance, node_name):
11201     """Ensure faulty disks abort the opcode or at least warn."""
11202     try:
11203       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11204                                   node_name, True):
11205         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11206                                    " node '%s'" % (instance.name, node_name),
11207                                    errors.ECODE_STATE)
11208     except errors.OpPrereqError, err:
11209       if self.op.ignore_consistency:
11210         self.proc.LogWarning(str(err.args[0]))
11211       else:
11212         raise
11213
11214   def CheckPrereq(self):
11215     """Check prerequisites.
11216
11217     """
11218     # Check whether any instance on this node has faulty disks
11219     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11220       if inst.admin_state != constants.ADMINST_UP:
11221         continue
11222       check_nodes = set(inst.all_nodes)
11223       check_nodes.discard(self.op.node_name)
11224       for inst_node_name in check_nodes:
11225         self._CheckFaultyDisks(inst, inst_node_name)
11226
11227   def Exec(self, feedback_fn):
11228     feedback_fn("Repairing storage unit '%s' on %s ..." %
11229                 (self.op.name, self.op.node_name))
11230
11231     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11232     result = self.rpc.call_storage_execute(self.op.node_name,
11233                                            self.op.storage_type, st_args,
11234                                            self.op.name,
11235                                            constants.SO_FIX_CONSISTENCY)
11236     result.Raise("Failed to repair storage unit '%s' on %s" %
11237                  (self.op.name, self.op.node_name))
11238
11239
11240 class LUNodeEvacuate(NoHooksLU):
11241   """Evacuates instances off a list of nodes.
11242
11243   """
11244   REQ_BGL = False
11245
11246   _MODE2IALLOCATOR = {
11247     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11248     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11249     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11250     }
11251   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11252   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11253           constants.IALLOCATOR_NEVAC_MODES)
11254
11255   def CheckArguments(self):
11256     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11257
11258   def ExpandNames(self):
11259     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11260
11261     if self.op.remote_node is not None:
11262       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11263       assert self.op.remote_node
11264
11265       if self.op.remote_node == self.op.node_name:
11266         raise errors.OpPrereqError("Can not use evacuated node as a new"
11267                                    " secondary node", errors.ECODE_INVAL)
11268
11269       if self.op.mode != constants.NODE_EVAC_SEC:
11270         raise errors.OpPrereqError("Without the use of an iallocator only"
11271                                    " secondary instances can be evacuated",
11272                                    errors.ECODE_INVAL)
11273
11274     # Declare locks
11275     self.share_locks = _ShareAll()
11276     self.needed_locks = {
11277       locking.LEVEL_INSTANCE: [],
11278       locking.LEVEL_NODEGROUP: [],
11279       locking.LEVEL_NODE: [],
11280       }
11281
11282     # Determine nodes (via group) optimistically, needs verification once locks
11283     # have been acquired
11284     self.lock_nodes = self._DetermineNodes()
11285
11286   def _DetermineNodes(self):
11287     """Gets the list of nodes to operate on.
11288
11289     """
11290     if self.op.remote_node is None:
11291       # Iallocator will choose any node(s) in the same group
11292       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11293     else:
11294       group_nodes = frozenset([self.op.remote_node])
11295
11296     # Determine nodes to be locked
11297     return set([self.op.node_name]) | group_nodes
11298
11299   def _DetermineInstances(self):
11300     """Builds list of instances to operate on.
11301
11302     """
11303     assert self.op.mode in constants.NODE_EVAC_MODES
11304
11305     if self.op.mode == constants.NODE_EVAC_PRI:
11306       # Primary instances only
11307       inst_fn = _GetNodePrimaryInstances
11308       assert self.op.remote_node is None, \
11309         "Evacuating primary instances requires iallocator"
11310     elif self.op.mode == constants.NODE_EVAC_SEC:
11311       # Secondary instances only
11312       inst_fn = _GetNodeSecondaryInstances
11313     else:
11314       # All instances
11315       assert self.op.mode == constants.NODE_EVAC_ALL
11316       inst_fn = _GetNodeInstances
11317       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11318       # per instance
11319       raise errors.OpPrereqError("Due to an issue with the iallocator"
11320                                  " interface it is not possible to evacuate"
11321                                  " all instances at once; specify explicitly"
11322                                  " whether to evacuate primary or secondary"
11323                                  " instances",
11324                                  errors.ECODE_INVAL)
11325
11326     return inst_fn(self.cfg, self.op.node_name)
11327
11328   def DeclareLocks(self, level):
11329     if level == locking.LEVEL_INSTANCE:
11330       # Lock instances optimistically, needs verification once node and group
11331       # locks have been acquired
11332       self.needed_locks[locking.LEVEL_INSTANCE] = \
11333         set(i.name for i in self._DetermineInstances())
11334
11335     elif level == locking.LEVEL_NODEGROUP:
11336       # Lock node groups for all potential target nodes optimistically, needs
11337       # verification once nodes have been acquired
11338       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11339         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11340
11341     elif level == locking.LEVEL_NODE:
11342       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11343
11344   def CheckPrereq(self):
11345     # Verify locks
11346     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11347     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11348     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11349
11350     need_nodes = self._DetermineNodes()
11351
11352     if not owned_nodes.issuperset(need_nodes):
11353       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11354                                  " locks were acquired, current nodes are"
11355                                  " are '%s', used to be '%s'; retry the"
11356                                  " operation" %
11357                                  (self.op.node_name,
11358                                   utils.CommaJoin(need_nodes),
11359                                   utils.CommaJoin(owned_nodes)),
11360                                  errors.ECODE_STATE)
11361
11362     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11363     if owned_groups != wanted_groups:
11364       raise errors.OpExecError("Node groups changed since locks were acquired,"
11365                                " current groups are '%s', used to be '%s';"
11366                                " retry the operation" %
11367                                (utils.CommaJoin(wanted_groups),
11368                                 utils.CommaJoin(owned_groups)))
11369
11370     # Determine affected instances
11371     self.instances = self._DetermineInstances()
11372     self.instance_names = [i.name for i in self.instances]
11373
11374     if set(self.instance_names) != owned_instances:
11375       raise errors.OpExecError("Instances on node '%s' changed since locks"
11376                                " were acquired, current instances are '%s',"
11377                                " used to be '%s'; retry the operation" %
11378                                (self.op.node_name,
11379                                 utils.CommaJoin(self.instance_names),
11380                                 utils.CommaJoin(owned_instances)))
11381
11382     if self.instance_names:
11383       self.LogInfo("Evacuating instances from node '%s': %s",
11384                    self.op.node_name,
11385                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11386     else:
11387       self.LogInfo("No instances to evacuate from node '%s'",
11388                    self.op.node_name)
11389
11390     if self.op.remote_node is not None:
11391       for i in self.instances:
11392         if i.primary_node == self.op.remote_node:
11393           raise errors.OpPrereqError("Node %s is the primary node of"
11394                                      " instance %s, cannot use it as"
11395                                      " secondary" %
11396                                      (self.op.remote_node, i.name),
11397                                      errors.ECODE_INVAL)
11398
11399   def Exec(self, feedback_fn):
11400     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11401
11402     if not self.instance_names:
11403       # No instances to evacuate
11404       jobs = []
11405
11406     elif self.op.iallocator is not None:
11407       # TODO: Implement relocation to other group
11408       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11409                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11410                        instances=list(self.instance_names))
11411
11412       ial.Run(self.op.iallocator)
11413
11414       if not ial.success:
11415         raise errors.OpPrereqError("Can't compute node evacuation using"
11416                                    " iallocator '%s': %s" %
11417                                    (self.op.iallocator, ial.info),
11418                                    errors.ECODE_NORES)
11419
11420       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11421
11422     elif self.op.remote_node is not None:
11423       assert self.op.mode == constants.NODE_EVAC_SEC
11424       jobs = [
11425         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11426                                         remote_node=self.op.remote_node,
11427                                         disks=[],
11428                                         mode=constants.REPLACE_DISK_CHG,
11429                                         early_release=self.op.early_release)]
11430         for instance_name in self.instance_names
11431         ]
11432
11433     else:
11434       raise errors.ProgrammerError("No iallocator or remote node")
11435
11436     return ResultWithJobs(jobs)
11437
11438
11439 def _SetOpEarlyRelease(early_release, op):
11440   """Sets C{early_release} flag on opcodes if available.
11441
11442   """
11443   try:
11444     op.early_release = early_release
11445   except AttributeError:
11446     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11447
11448   return op
11449
11450
11451 def _NodeEvacDest(use_nodes, group, nodes):
11452   """Returns group or nodes depending on caller's choice.
11453
11454   """
11455   if use_nodes:
11456     return utils.CommaJoin(nodes)
11457   else:
11458     return group
11459
11460
11461 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11462   """Unpacks the result of change-group and node-evacuate iallocator requests.
11463
11464   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11465   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11466
11467   @type lu: L{LogicalUnit}
11468   @param lu: Logical unit instance
11469   @type alloc_result: tuple/list
11470   @param alloc_result: Result from iallocator
11471   @type early_release: bool
11472   @param early_release: Whether to release locks early if possible
11473   @type use_nodes: bool
11474   @param use_nodes: Whether to display node names instead of groups
11475
11476   """
11477   (moved, failed, jobs) = alloc_result
11478
11479   if failed:
11480     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11481                                  for (name, reason) in failed)
11482     lu.LogWarning("Unable to evacuate instances %s", failreason)
11483     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11484
11485   if moved:
11486     lu.LogInfo("Instances to be moved: %s",
11487                utils.CommaJoin("%s (to %s)" %
11488                                (name, _NodeEvacDest(use_nodes, group, nodes))
11489                                for (name, group, nodes) in moved))
11490
11491   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11492               map(opcodes.OpCode.LoadOpCode, ops))
11493           for ops in jobs]
11494
11495
11496 class LUInstanceGrowDisk(LogicalUnit):
11497   """Grow a disk of an instance.
11498
11499   """
11500   HPATH = "disk-grow"
11501   HTYPE = constants.HTYPE_INSTANCE
11502   REQ_BGL = False
11503
11504   def ExpandNames(self):
11505     self._ExpandAndLockInstance()
11506     self.needed_locks[locking.LEVEL_NODE] = []
11507     self.needed_locks[locking.LEVEL_NODE_RES] = []
11508     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11509     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11510
11511   def DeclareLocks(self, level):
11512     if level == locking.LEVEL_NODE:
11513       self._LockInstancesNodes()
11514     elif level == locking.LEVEL_NODE_RES:
11515       # Copy node locks
11516       self.needed_locks[locking.LEVEL_NODE_RES] = \
11517         self.needed_locks[locking.LEVEL_NODE][:]
11518
11519   def BuildHooksEnv(self):
11520     """Build hooks env.
11521
11522     This runs on the master, the primary and all the secondaries.
11523
11524     """
11525     env = {
11526       "DISK": self.op.disk,
11527       "AMOUNT": self.op.amount,
11528       }
11529     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11530     return env
11531
11532   def BuildHooksNodes(self):
11533     """Build hooks nodes.
11534
11535     """
11536     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11537     return (nl, nl)
11538
11539   def CheckPrereq(self):
11540     """Check prerequisites.
11541
11542     This checks that the instance is in the cluster.
11543
11544     """
11545     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11546     assert instance is not None, \
11547       "Cannot retrieve locked instance %s" % self.op.instance_name
11548     nodenames = list(instance.all_nodes)
11549     for node in nodenames:
11550       _CheckNodeOnline(self, node)
11551
11552     self.instance = instance
11553
11554     if instance.disk_template not in constants.DTS_GROWABLE:
11555       raise errors.OpPrereqError("Instance's disk layout does not support"
11556                                  " growing", errors.ECODE_INVAL)
11557
11558     self.disk = instance.FindDisk(self.op.disk)
11559
11560     if instance.disk_template not in (constants.DT_FILE,
11561                                       constants.DT_SHARED_FILE,
11562                                       constants.DT_RBD):
11563       # TODO: check the free disk space for file, when that feature will be
11564       # supported
11565       _CheckNodesFreeDiskPerVG(self, nodenames,
11566                                self.disk.ComputeGrowth(self.op.amount))
11567
11568   def Exec(self, feedback_fn):
11569     """Execute disk grow.
11570
11571     """
11572     instance = self.instance
11573     disk = self.disk
11574
11575     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11576     assert (self.owned_locks(locking.LEVEL_NODE) ==
11577             self.owned_locks(locking.LEVEL_NODE_RES))
11578
11579     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11580     if not disks_ok:
11581       raise errors.OpExecError("Cannot activate block device to grow")
11582
11583     feedback_fn("Growing disk %s of instance '%s' by %s" %
11584                 (self.op.disk, instance.name,
11585                  utils.FormatUnit(self.op.amount, "h")))
11586
11587     # First run all grow ops in dry-run mode
11588     for node in instance.all_nodes:
11589       self.cfg.SetDiskID(disk, node)
11590       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11591       result.Raise("Grow request failed to node %s" % node)
11592
11593     # We know that (as far as we can test) operations across different
11594     # nodes will succeed, time to run it for real
11595     for node in instance.all_nodes:
11596       self.cfg.SetDiskID(disk, node)
11597       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11598       result.Raise("Grow request failed to node %s" % node)
11599
11600       # TODO: Rewrite code to work properly
11601       # DRBD goes into sync mode for a short amount of time after executing the
11602       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11603       # calling "resize" in sync mode fails. Sleeping for a short amount of
11604       # time is a work-around.
11605       time.sleep(5)
11606
11607     disk.RecordGrow(self.op.amount)
11608     self.cfg.Update(instance, feedback_fn)
11609
11610     # Changes have been recorded, release node lock
11611     _ReleaseLocks(self, locking.LEVEL_NODE)
11612
11613     # Downgrade lock while waiting for sync
11614     self.glm.downgrade(locking.LEVEL_INSTANCE)
11615
11616     if self.op.wait_for_sync:
11617       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11618       if disk_abort:
11619         self.proc.LogWarning("Disk sync-ing has not returned a good"
11620                              " status; please check the instance")
11621       if instance.admin_state != constants.ADMINST_UP:
11622         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11623     elif instance.admin_state != constants.ADMINST_UP:
11624       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11625                            " not supposed to be running because no wait for"
11626                            " sync mode was requested")
11627
11628     assert self.owned_locks(locking.LEVEL_NODE_RES)
11629     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11630
11631
11632 class LUInstanceQueryData(NoHooksLU):
11633   """Query runtime instance data.
11634
11635   """
11636   REQ_BGL = False
11637
11638   def ExpandNames(self):
11639     self.needed_locks = {}
11640
11641     # Use locking if requested or when non-static information is wanted
11642     if not (self.op.static or self.op.use_locking):
11643       self.LogWarning("Non-static data requested, locks need to be acquired")
11644       self.op.use_locking = True
11645
11646     if self.op.instances or not self.op.use_locking:
11647       # Expand instance names right here
11648       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11649     else:
11650       # Will use acquired locks
11651       self.wanted_names = None
11652
11653     if self.op.use_locking:
11654       self.share_locks = _ShareAll()
11655
11656       if self.wanted_names is None:
11657         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11658       else:
11659         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11660
11661       self.needed_locks[locking.LEVEL_NODE] = []
11662       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11663
11664   def DeclareLocks(self, level):
11665     if self.op.use_locking and level == locking.LEVEL_NODE:
11666       self._LockInstancesNodes()
11667
11668   def CheckPrereq(self):
11669     """Check prerequisites.
11670
11671     This only checks the optional instance list against the existing names.
11672
11673     """
11674     if self.wanted_names is None:
11675       assert self.op.use_locking, "Locking was not used"
11676       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11677
11678     self.wanted_instances = \
11679         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11680
11681   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11682     """Returns the status of a block device
11683
11684     """
11685     if self.op.static or not node:
11686       return None
11687
11688     self.cfg.SetDiskID(dev, node)
11689
11690     result = self.rpc.call_blockdev_find(node, dev)
11691     if result.offline:
11692       return None
11693
11694     result.Raise("Can't compute disk status for %s" % instance_name)
11695
11696     status = result.payload
11697     if status is None:
11698       return None
11699
11700     return (status.dev_path, status.major, status.minor,
11701             status.sync_percent, status.estimated_time,
11702             status.is_degraded, status.ldisk_status)
11703
11704   def _ComputeDiskStatus(self, instance, snode, dev):
11705     """Compute block device status.
11706
11707     """
11708     if dev.dev_type in constants.LDS_DRBD:
11709       # we change the snode then (otherwise we use the one passed in)
11710       if dev.logical_id[0] == instance.primary_node:
11711         snode = dev.logical_id[1]
11712       else:
11713         snode = dev.logical_id[0]
11714
11715     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11716                                               instance.name, dev)
11717     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11718
11719     if dev.children:
11720       dev_children = map(compat.partial(self._ComputeDiskStatus,
11721                                         instance, snode),
11722                          dev.children)
11723     else:
11724       dev_children = []
11725
11726     return {
11727       "iv_name": dev.iv_name,
11728       "dev_type": dev.dev_type,
11729       "logical_id": dev.logical_id,
11730       "physical_id": dev.physical_id,
11731       "pstatus": dev_pstatus,
11732       "sstatus": dev_sstatus,
11733       "children": dev_children,
11734       "mode": dev.mode,
11735       "size": dev.size,
11736       }
11737
11738   def Exec(self, feedback_fn):
11739     """Gather and return data"""
11740     result = {}
11741
11742     cluster = self.cfg.GetClusterInfo()
11743
11744     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11745                                           for i in self.wanted_instances)
11746     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11747       if self.op.static or pnode.offline:
11748         remote_state = None
11749         if pnode.offline:
11750           self.LogWarning("Primary node %s is marked offline, returning static"
11751                           " information only for instance %s" %
11752                           (pnode.name, instance.name))
11753       else:
11754         remote_info = self.rpc.call_instance_info(instance.primary_node,
11755                                                   instance.name,
11756                                                   instance.hypervisor)
11757         remote_info.Raise("Error checking node %s" % instance.primary_node)
11758         remote_info = remote_info.payload
11759         if remote_info and "state" in remote_info:
11760           remote_state = "up"
11761         else:
11762           if instance.admin_state == constants.ADMINST_UP:
11763             remote_state = "down"
11764           else:
11765             remote_state = instance.admin_state
11766
11767       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11768                   instance.disks)
11769
11770       result[instance.name] = {
11771         "name": instance.name,
11772         "config_state": instance.admin_state,
11773         "run_state": remote_state,
11774         "pnode": instance.primary_node,
11775         "snodes": instance.secondary_nodes,
11776         "os": instance.os,
11777         # this happens to be the same format used for hooks
11778         "nics": _NICListToTuple(self, instance.nics),
11779         "disk_template": instance.disk_template,
11780         "disks": disks,
11781         "hypervisor": instance.hypervisor,
11782         "network_port": instance.network_port,
11783         "hv_instance": instance.hvparams,
11784         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11785         "be_instance": instance.beparams,
11786         "be_actual": cluster.FillBE(instance),
11787         "os_instance": instance.osparams,
11788         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11789         "serial_no": instance.serial_no,
11790         "mtime": instance.mtime,
11791         "ctime": instance.ctime,
11792         "uuid": instance.uuid,
11793         }
11794
11795     return result
11796
11797
11798 def PrepareContainerMods(mods, private_fn):
11799   """Prepares a list of container modifications by adding a private data field.
11800
11801   @type mods: list of tuples; (operation, index, parameters)
11802   @param mods: List of modifications
11803   @type private_fn: callable or None
11804   @param private_fn: Callable for constructing a private data field for a
11805     modification
11806   @rtype: list
11807
11808   """
11809   if private_fn is None:
11810     fn = lambda: None
11811   else:
11812     fn = private_fn
11813
11814   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11815
11816
11817 #: Type description for changes as returned by L{ApplyContainerMods}'s
11818 #: callbacks
11819 _TApplyContModsCbChanges = \
11820   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11821     ht.TNonEmptyString,
11822     ht.TAny,
11823     ])))
11824
11825
11826 def ApplyContainerMods(kind, container, chgdesc, mods,
11827                        create_fn, modify_fn, remove_fn):
11828   """Applies descriptions in C{mods} to C{container}.
11829
11830   @type kind: string
11831   @param kind: One-word item description
11832   @type container: list
11833   @param container: Container to modify
11834   @type chgdesc: None or list
11835   @param chgdesc: List of applied changes
11836   @type mods: list
11837   @param mods: Modifications as returned by L{PrepareContainerMods}
11838   @type create_fn: callable
11839   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11840     receives absolute item index, parameters and private data object as added
11841     by L{PrepareContainerMods}, returns tuple containing new item and changes
11842     as list
11843   @type modify_fn: callable
11844   @param modify_fn: Callback for modifying an existing item
11845     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11846     and private data object as added by L{PrepareContainerMods}, returns
11847     changes as list
11848   @type remove_fn: callable
11849   @param remove_fn: Callback on removing item; receives absolute item index,
11850     item and private data object as added by L{PrepareContainerMods}
11851
11852   """
11853   for (op, idx, params, private) in mods:
11854     if idx == -1:
11855       # Append
11856       absidx = len(container) - 1
11857     elif idx < 0:
11858       raise IndexError("Not accepting negative indices other than -1")
11859     elif idx > len(container):
11860       raise IndexError("Got %s index %s, but there are only %s" %
11861                        (kind, idx, len(container)))
11862     else:
11863       absidx = idx
11864
11865     changes = None
11866
11867     if op == constants.DDM_ADD:
11868       # Calculate where item will be added
11869       if idx == -1:
11870         addidx = len(container)
11871       else:
11872         addidx = idx
11873
11874       if create_fn is None:
11875         item = params
11876       else:
11877         (item, changes) = create_fn(addidx, params, private)
11878
11879       if idx == -1:
11880         container.append(item)
11881       else:
11882         assert idx >= 0
11883         assert idx <= len(container)
11884         # list.insert does so before the specified index
11885         container.insert(idx, item)
11886     else:
11887       # Retrieve existing item
11888       try:
11889         item = container[absidx]
11890       except IndexError:
11891         raise IndexError("Invalid %s index %s" % (kind, idx))
11892
11893       if op == constants.DDM_REMOVE:
11894         assert not params
11895
11896         if remove_fn is not None:
11897           remove_fn(absidx, item, private)
11898
11899         changes = [("%s/%s" % (kind, absidx), "remove")]
11900
11901         assert container[absidx] == item
11902         del container[absidx]
11903       elif op == constants.DDM_MODIFY:
11904         if modify_fn is not None:
11905           changes = modify_fn(absidx, item, params, private)
11906       else:
11907         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11908
11909     assert _TApplyContModsCbChanges(changes)
11910
11911     if not (chgdesc is None or changes is None):
11912       chgdesc.extend(changes)
11913
11914
11915 def _UpdateIvNames(base_index, disks):
11916   """Updates the C{iv_name} attribute of disks.
11917
11918   @type disks: list of L{objects.Disk}
11919
11920   """
11921   for (idx, disk) in enumerate(disks):
11922     disk.iv_name = "disk/%s" % (base_index + idx, )
11923
11924
11925 class _InstNicModPrivate:
11926   """Data structure for network interface modifications.
11927
11928   Used by L{LUInstanceSetParams}.
11929
11930   """
11931   def __init__(self):
11932     self.params = None
11933     self.filled = None
11934
11935
11936 class LUInstanceSetParams(LogicalUnit):
11937   """Modifies an instances's parameters.
11938
11939   """
11940   HPATH = "instance-modify"
11941   HTYPE = constants.HTYPE_INSTANCE
11942   REQ_BGL = False
11943
11944   @staticmethod
11945   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11946     assert ht.TList(mods)
11947     assert not mods or len(mods[0]) in (2, 3)
11948
11949     if mods and len(mods[0]) == 2:
11950       result = []
11951
11952       addremove = 0
11953       for op, params in mods:
11954         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11955           result.append((op, -1, params))
11956           addremove += 1
11957
11958           if addremove > 1:
11959             raise errors.OpPrereqError("Only one %s add or remove operation is"
11960                                        " supported at a time" % kind,
11961                                        errors.ECODE_INVAL)
11962         else:
11963           result.append((constants.DDM_MODIFY, op, params))
11964
11965       assert verify_fn(result)
11966     else:
11967       result = mods
11968
11969     return result
11970
11971   @staticmethod
11972   def _CheckMods(kind, mods, key_types, item_fn):
11973     """Ensures requested disk/NIC modifications are valid.
11974
11975     """
11976     for (op, _, params) in mods:
11977       assert ht.TDict(params)
11978
11979       utils.ForceDictType(params, key_types)
11980
11981       if op == constants.DDM_REMOVE:
11982         if params:
11983           raise errors.OpPrereqError("No settings should be passed when"
11984                                      " removing a %s" % kind,
11985                                      errors.ECODE_INVAL)
11986       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11987         item_fn(op, params)
11988       else:
11989         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11990
11991   @staticmethod
11992   def _VerifyDiskModification(op, params):
11993     """Verifies a disk modification.
11994
11995     """
11996     if op == constants.DDM_ADD:
11997       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11998       if mode not in constants.DISK_ACCESS_SET:
11999         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12000                                    errors.ECODE_INVAL)
12001
12002       size = params.get(constants.IDISK_SIZE, None)
12003       if size is None:
12004         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12005                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12006
12007       try:
12008         size = int(size)
12009       except (TypeError, ValueError), err:
12010         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12011                                    errors.ECODE_INVAL)
12012
12013       params[constants.IDISK_SIZE] = size
12014
12015     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12016       raise errors.OpPrereqError("Disk size change not possible, use"
12017                                  " grow-disk", errors.ECODE_INVAL)
12018
12019   @staticmethod
12020   def _VerifyNicModification(op, params):
12021     """Verifies a network interface modification.
12022
12023     """
12024     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12025       ip = params.get(constants.INIC_IP, None)
12026       if ip is None:
12027         pass
12028       elif ip.lower() == constants.VALUE_NONE:
12029         params[constants.INIC_IP] = None
12030       elif not netutils.IPAddress.IsValid(ip):
12031         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12032                                    errors.ECODE_INVAL)
12033
12034       bridge = params.get("bridge", None)
12035       link = params.get(constants.INIC_LINK, None)
12036       if bridge and link:
12037         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12038                                    " at the same time", errors.ECODE_INVAL)
12039       elif bridge and bridge.lower() == constants.VALUE_NONE:
12040         params["bridge"] = None
12041       elif link and link.lower() == constants.VALUE_NONE:
12042         params[constants.INIC_LINK] = None
12043
12044       if op == constants.DDM_ADD:
12045         macaddr = params.get(constants.INIC_MAC, None)
12046         if macaddr is None:
12047           params[constants.INIC_MAC] = constants.VALUE_AUTO
12048
12049       if constants.INIC_MAC in params:
12050         macaddr = params[constants.INIC_MAC]
12051         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12052           macaddr = utils.NormalizeAndValidateMac(macaddr)
12053
12054         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12055           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12056                                      " modifying an existing NIC",
12057                                      errors.ECODE_INVAL)
12058
12059   def CheckArguments(self):
12060     if not (self.op.nics or self.op.disks or self.op.disk_template or
12061             self.op.hvparams or self.op.beparams or self.op.os_name or
12062             self.op.offline is not None or self.op.runtime_mem):
12063       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12064
12065     if self.op.hvparams:
12066       _CheckGlobalHvParams(self.op.hvparams)
12067
12068     self.op.disks = \
12069       self._UpgradeDiskNicMods("disk", self.op.disks,
12070         opcodes.OpInstanceSetParams.TestDiskModifications)
12071     self.op.nics = \
12072       self._UpgradeDiskNicMods("NIC", self.op.nics,
12073         opcodes.OpInstanceSetParams.TestNicModifications)
12074
12075     # Check disk modifications
12076     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12077                     self._VerifyDiskModification)
12078
12079     if self.op.disks and self.op.disk_template is not None:
12080       raise errors.OpPrereqError("Disk template conversion and other disk"
12081                                  " changes not supported at the same time",
12082                                  errors.ECODE_INVAL)
12083
12084     if (self.op.disk_template and
12085         self.op.disk_template in constants.DTS_INT_MIRROR and
12086         self.op.remote_node is None):
12087       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12088                                  " one requires specifying a secondary node",
12089                                  errors.ECODE_INVAL)
12090
12091     # Check NIC modifications
12092     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12093                     self._VerifyNicModification)
12094
12095   def ExpandNames(self):
12096     self._ExpandAndLockInstance()
12097     # Can't even acquire node locks in shared mode as upcoming changes in
12098     # Ganeti 2.6 will start to modify the node object on disk conversion
12099     self.needed_locks[locking.LEVEL_NODE] = []
12100     self.needed_locks[locking.LEVEL_NODE_RES] = []
12101     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12102
12103   def DeclareLocks(self, level):
12104     # TODO: Acquire group lock in shared mode (disk parameters)
12105     if level == locking.LEVEL_NODE:
12106       self._LockInstancesNodes()
12107       if self.op.disk_template and self.op.remote_node:
12108         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12109         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12110     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12111       # Copy node locks
12112       self.needed_locks[locking.LEVEL_NODE_RES] = \
12113         self.needed_locks[locking.LEVEL_NODE][:]
12114
12115   def BuildHooksEnv(self):
12116     """Build hooks env.
12117
12118     This runs on the master, primary and secondaries.
12119
12120     """
12121     args = dict()
12122     if constants.BE_MINMEM in self.be_new:
12123       args["minmem"] = self.be_new[constants.BE_MINMEM]
12124     if constants.BE_MAXMEM in self.be_new:
12125       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12126     if constants.BE_VCPUS in self.be_new:
12127       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12128     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12129     # information at all.
12130
12131     if self._new_nics is not None:
12132       nics = []
12133
12134       for nic in self._new_nics:
12135         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12136         mode = nicparams[constants.NIC_MODE]
12137         link = nicparams[constants.NIC_LINK]
12138         nics.append((nic.ip, nic.mac, mode, link))
12139
12140       args["nics"] = nics
12141
12142     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12143     if self.op.disk_template:
12144       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12145     if self.op.runtime_mem:
12146       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12147
12148     return env
12149
12150   def BuildHooksNodes(self):
12151     """Build hooks nodes.
12152
12153     """
12154     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12155     return (nl, nl)
12156
12157   def _PrepareNicModification(self, params, private, old_ip, old_params,
12158                               cluster, pnode):
12159     update_params_dict = dict([(key, params[key])
12160                                for key in constants.NICS_PARAMETERS
12161                                if key in params])
12162
12163     if "bridge" in params:
12164       update_params_dict[constants.NIC_LINK] = params["bridge"]
12165
12166     new_params = _GetUpdatedParams(old_params, update_params_dict)
12167     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12168
12169     new_filled_params = cluster.SimpleFillNIC(new_params)
12170     objects.NIC.CheckParameterSyntax(new_filled_params)
12171
12172     new_mode = new_filled_params[constants.NIC_MODE]
12173     if new_mode == constants.NIC_MODE_BRIDGED:
12174       bridge = new_filled_params[constants.NIC_LINK]
12175       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12176       if msg:
12177         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12178         if self.op.force:
12179           self.warn.append(msg)
12180         else:
12181           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12182
12183     elif new_mode == constants.NIC_MODE_ROUTED:
12184       ip = params.get(constants.INIC_IP, old_ip)
12185       if ip is None:
12186         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12187                                    " on a routed NIC", errors.ECODE_INVAL)
12188
12189     if constants.INIC_MAC in params:
12190       mac = params[constants.INIC_MAC]
12191       if mac is None:
12192         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12193                                    errors.ECODE_INVAL)
12194       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12195         # otherwise generate the MAC address
12196         params[constants.INIC_MAC] = \
12197           self.cfg.GenerateMAC(self.proc.GetECId())
12198       else:
12199         # or validate/reserve the current one
12200         try:
12201           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12202         except errors.ReservationError:
12203           raise errors.OpPrereqError("MAC address '%s' already in use"
12204                                      " in cluster" % mac,
12205                                      errors.ECODE_NOTUNIQUE)
12206
12207     private.params = new_params
12208     private.filled = new_filled_params
12209
12210     return (None, None)
12211
12212   def CheckPrereq(self):
12213     """Check prerequisites.
12214
12215     This only checks the instance list against the existing names.
12216
12217     """
12218     # checking the new params on the primary/secondary nodes
12219
12220     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12221     cluster = self.cluster = self.cfg.GetClusterInfo()
12222     assert self.instance is not None, \
12223       "Cannot retrieve locked instance %s" % self.op.instance_name
12224     pnode = instance.primary_node
12225     nodelist = list(instance.all_nodes)
12226     pnode_info = self.cfg.GetNodeInfo(pnode)
12227     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12228
12229     # Prepare disk/NIC modifications
12230     self.diskmod = PrepareContainerMods(self.op.disks, None)
12231     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12232
12233     # OS change
12234     if self.op.os_name and not self.op.force:
12235       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12236                       self.op.force_variant)
12237       instance_os = self.op.os_name
12238     else:
12239       instance_os = instance.os
12240
12241     assert not (self.op.disk_template and self.op.disks), \
12242       "Can't modify disk template and apply disk changes at the same time"
12243
12244     if self.op.disk_template:
12245       if instance.disk_template == self.op.disk_template:
12246         raise errors.OpPrereqError("Instance already has disk template %s" %
12247                                    instance.disk_template, errors.ECODE_INVAL)
12248
12249       if (instance.disk_template,
12250           self.op.disk_template) not in self._DISK_CONVERSIONS:
12251         raise errors.OpPrereqError("Unsupported disk template conversion from"
12252                                    " %s to %s" % (instance.disk_template,
12253                                                   self.op.disk_template),
12254                                    errors.ECODE_INVAL)
12255       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12256                           msg="cannot change disk template")
12257       if self.op.disk_template in constants.DTS_INT_MIRROR:
12258         if self.op.remote_node == pnode:
12259           raise errors.OpPrereqError("Given new secondary node %s is the same"
12260                                      " as the primary node of the instance" %
12261                                      self.op.remote_node, errors.ECODE_STATE)
12262         _CheckNodeOnline(self, self.op.remote_node)
12263         _CheckNodeNotDrained(self, self.op.remote_node)
12264         # FIXME: here we assume that the old instance type is DT_PLAIN
12265         assert instance.disk_template == constants.DT_PLAIN
12266         disks = [{constants.IDISK_SIZE: d.size,
12267                   constants.IDISK_VG: d.logical_id[0]}
12268                  for d in instance.disks]
12269         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12270         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12271
12272         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12273         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12274         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12275         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12276                                 ignore=self.op.ignore_ipolicy)
12277         if pnode_info.group != snode_info.group:
12278           self.LogWarning("The primary and secondary nodes are in two"
12279                           " different node groups; the disk parameters"
12280                           " from the first disk's node group will be"
12281                           " used")
12282
12283     # hvparams processing
12284     if self.op.hvparams:
12285       hv_type = instance.hypervisor
12286       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12287       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12288       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12289
12290       # local check
12291       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12292       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12293       self.hv_proposed = self.hv_new = hv_new # the new actual values
12294       self.hv_inst = i_hvdict # the new dict (without defaults)
12295     else:
12296       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12297                                               instance.hvparams)
12298       self.hv_new = self.hv_inst = {}
12299
12300     # beparams processing
12301     if self.op.beparams:
12302       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12303                                    use_none=True)
12304       objects.UpgradeBeParams(i_bedict)
12305       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12306       be_new = cluster.SimpleFillBE(i_bedict)
12307       self.be_proposed = self.be_new = be_new # the new actual values
12308       self.be_inst = i_bedict # the new dict (without defaults)
12309     else:
12310       self.be_new = self.be_inst = {}
12311       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12312     be_old = cluster.FillBE(instance)
12313
12314     # CPU param validation -- checking every time a paramtere is
12315     # changed to cover all cases where either CPU mask or vcpus have
12316     # changed
12317     if (constants.BE_VCPUS in self.be_proposed and
12318         constants.HV_CPU_MASK in self.hv_proposed):
12319       cpu_list = \
12320         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12321       # Verify mask is consistent with number of vCPUs. Can skip this
12322       # test if only 1 entry in the CPU mask, which means same mask
12323       # is applied to all vCPUs.
12324       if (len(cpu_list) > 1 and
12325           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12326         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12327                                    " CPU mask [%s]" %
12328                                    (self.be_proposed[constants.BE_VCPUS],
12329                                     self.hv_proposed[constants.HV_CPU_MASK]),
12330                                    errors.ECODE_INVAL)
12331
12332       # Only perform this test if a new CPU mask is given
12333       if constants.HV_CPU_MASK in self.hv_new:
12334         # Calculate the largest CPU number requested
12335         max_requested_cpu = max(map(max, cpu_list))
12336         # Check that all of the instance's nodes have enough physical CPUs to
12337         # satisfy the requested CPU mask
12338         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12339                                 max_requested_cpu + 1, instance.hypervisor)
12340
12341     # osparams processing
12342     if self.op.osparams:
12343       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12344       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12345       self.os_inst = i_osdict # the new dict (without defaults)
12346     else:
12347       self.os_inst = {}
12348
12349     self.warn = []
12350
12351     #TODO(dynmem): do the appropriate check involving MINMEM
12352     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12353         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12354       mem_check_list = [pnode]
12355       if be_new[constants.BE_AUTO_BALANCE]:
12356         # either we changed auto_balance to yes or it was from before
12357         mem_check_list.extend(instance.secondary_nodes)
12358       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12359                                                   instance.hypervisor)
12360       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12361                                          [instance.hypervisor])
12362       pninfo = nodeinfo[pnode]
12363       msg = pninfo.fail_msg
12364       if msg:
12365         # Assume the primary node is unreachable and go ahead
12366         self.warn.append("Can't get info from primary node %s: %s" %
12367                          (pnode, msg))
12368       else:
12369         (_, _, (pnhvinfo, )) = pninfo.payload
12370         if not isinstance(pnhvinfo.get("memory_free", None), int):
12371           self.warn.append("Node data from primary node %s doesn't contain"
12372                            " free memory information" % pnode)
12373         elif instance_info.fail_msg:
12374           self.warn.append("Can't get instance runtime information: %s" %
12375                           instance_info.fail_msg)
12376         else:
12377           if instance_info.payload:
12378             current_mem = int(instance_info.payload["memory"])
12379           else:
12380             # Assume instance not running
12381             # (there is a slight race condition here, but it's not very
12382             # probable, and we have no other way to check)
12383             # TODO: Describe race condition
12384             current_mem = 0
12385           #TODO(dynmem): do the appropriate check involving MINMEM
12386           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12387                       pnhvinfo["memory_free"])
12388           if miss_mem > 0:
12389             raise errors.OpPrereqError("This change will prevent the instance"
12390                                        " from starting, due to %d MB of memory"
12391                                        " missing on its primary node" %
12392                                        miss_mem,
12393                                        errors.ECODE_NORES)
12394
12395       if be_new[constants.BE_AUTO_BALANCE]:
12396         for node, nres in nodeinfo.items():
12397           if node not in instance.secondary_nodes:
12398             continue
12399           nres.Raise("Can't get info from secondary node %s" % node,
12400                      prereq=True, ecode=errors.ECODE_STATE)
12401           (_, _, (nhvinfo, )) = nres.payload
12402           if not isinstance(nhvinfo.get("memory_free", None), int):
12403             raise errors.OpPrereqError("Secondary node %s didn't return free"
12404                                        " memory information" % node,
12405                                        errors.ECODE_STATE)
12406           #TODO(dynmem): do the appropriate check involving MINMEM
12407           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12408             raise errors.OpPrereqError("This change will prevent the instance"
12409                                        " from failover to its secondary node"
12410                                        " %s, due to not enough memory" % node,
12411                                        errors.ECODE_STATE)
12412
12413     if self.op.runtime_mem:
12414       remote_info = self.rpc.call_instance_info(instance.primary_node,
12415                                                 instance.name,
12416                                                 instance.hypervisor)
12417       remote_info.Raise("Error checking node %s" % instance.primary_node)
12418       if not remote_info.payload: # not running already
12419         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12420                                    errors.ECODE_STATE)
12421
12422       current_memory = remote_info.payload["memory"]
12423       if (not self.op.force and
12424            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12425             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12426         raise errors.OpPrereqError("Instance %s must have memory between %d"
12427                                    " and %d MB of memory unless --force is"
12428                                    " given" % (instance.name,
12429                                     self.be_proposed[constants.BE_MINMEM],
12430                                     self.be_proposed[constants.BE_MAXMEM]),
12431                                    errors.ECODE_INVAL)
12432
12433       if self.op.runtime_mem > current_memory:
12434         _CheckNodeFreeMemory(self, instance.primary_node,
12435                              "ballooning memory for instance %s" %
12436                              instance.name,
12437                              self.op.memory - current_memory,
12438                              instance.hypervisor)
12439
12440     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12441       raise errors.OpPrereqError("Disk operations not supported for"
12442                                  " diskless instances",
12443                                  errors.ECODE_INVAL)
12444
12445     def _PrepareNicCreate(_, params, private):
12446       return self._PrepareNicModification(params, private, None, {},
12447                                           cluster, pnode)
12448
12449     def _PrepareNicMod(_, nic, params, private):
12450       return self._PrepareNicModification(params, private, nic.ip,
12451                                           nic.nicparams, cluster, pnode)
12452
12453     # Verify NIC changes (operating on copy)
12454     nics = instance.nics[:]
12455     ApplyContainerMods("NIC", nics, None, self.nicmod,
12456                        _PrepareNicCreate, _PrepareNicMod, None)
12457     if len(nics) > constants.MAX_NICS:
12458       raise errors.OpPrereqError("Instance has too many network interfaces"
12459                                  " (%d), cannot add more" % constants.MAX_NICS,
12460                                  errors.ECODE_STATE)
12461
12462     # Verify disk changes (operating on a copy)
12463     disks = instance.disks[:]
12464     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12465     if len(disks) > constants.MAX_DISKS:
12466       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12467                                  " more" % constants.MAX_DISKS,
12468                                  errors.ECODE_STATE)
12469
12470     if self.op.offline is not None:
12471       if self.op.offline:
12472         msg = "can't change to offline"
12473       else:
12474         msg = "can't change to online"
12475       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12476
12477     # Pre-compute NIC changes (necessary to use result in hooks)
12478     self._nic_chgdesc = []
12479     if self.nicmod:
12480       # Operate on copies as this is still in prereq
12481       nics = [nic.Copy() for nic in instance.nics]
12482       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12483                          self._CreateNewNic, self._ApplyNicMods, None)
12484       self._new_nics = nics
12485     else:
12486       self._new_nics = None
12487
12488   def _ConvertPlainToDrbd(self, feedback_fn):
12489     """Converts an instance from plain to drbd.
12490
12491     """
12492     feedback_fn("Converting template to drbd")
12493     instance = self.instance
12494     pnode = instance.primary_node
12495     snode = self.op.remote_node
12496
12497     assert instance.disk_template == constants.DT_PLAIN
12498
12499     # create a fake disk info for _GenerateDiskTemplate
12500     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12501                   constants.IDISK_VG: d.logical_id[0]}
12502                  for d in instance.disks]
12503     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12504                                       instance.name, pnode, [snode],
12505                                       disk_info, None, None, 0, feedback_fn,
12506                                       self.diskparams)
12507     info = _GetInstanceInfoText(instance)
12508     feedback_fn("Creating aditional volumes...")
12509     # first, create the missing data and meta devices
12510     for disk in new_disks:
12511       # unfortunately this is... not too nice
12512       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12513                             info, True)
12514       for child in disk.children:
12515         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12516     # at this stage, all new LVs have been created, we can rename the
12517     # old ones
12518     feedback_fn("Renaming original volumes...")
12519     rename_list = [(o, n.children[0].logical_id)
12520                    for (o, n) in zip(instance.disks, new_disks)]
12521     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12522     result.Raise("Failed to rename original LVs")
12523
12524     feedback_fn("Initializing DRBD devices...")
12525     # all child devices are in place, we can now create the DRBD devices
12526     for disk in new_disks:
12527       for node in [pnode, snode]:
12528         f_create = node == pnode
12529         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12530
12531     # at this point, the instance has been modified
12532     instance.disk_template = constants.DT_DRBD8
12533     instance.disks = new_disks
12534     self.cfg.Update(instance, feedback_fn)
12535
12536     # Release node locks while waiting for sync
12537     _ReleaseLocks(self, locking.LEVEL_NODE)
12538
12539     # disks are created, waiting for sync
12540     disk_abort = not _WaitForSync(self, instance,
12541                                   oneshot=not self.op.wait_for_sync)
12542     if disk_abort:
12543       raise errors.OpExecError("There are some degraded disks for"
12544                                " this instance, please cleanup manually")
12545
12546     # Node resource locks will be released by caller
12547
12548   def _ConvertDrbdToPlain(self, feedback_fn):
12549     """Converts an instance from drbd to plain.
12550
12551     """
12552     instance = self.instance
12553
12554     assert len(instance.secondary_nodes) == 1
12555     assert instance.disk_template == constants.DT_DRBD8
12556
12557     pnode = instance.primary_node
12558     snode = instance.secondary_nodes[0]
12559     feedback_fn("Converting template to plain")
12560
12561     old_disks = instance.disks
12562     new_disks = [d.children[0] for d in old_disks]
12563
12564     # copy over size and mode
12565     for parent, child in zip(old_disks, new_disks):
12566       child.size = parent.size
12567       child.mode = parent.mode
12568
12569     # update instance structure
12570     instance.disks = new_disks
12571     instance.disk_template = constants.DT_PLAIN
12572     self.cfg.Update(instance, feedback_fn)
12573
12574     # Release locks in case removing disks takes a while
12575     _ReleaseLocks(self, locking.LEVEL_NODE)
12576
12577     feedback_fn("Removing volumes on the secondary node...")
12578     for disk in old_disks:
12579       self.cfg.SetDiskID(disk, snode)
12580       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12581       if msg:
12582         self.LogWarning("Could not remove block device %s on node %s,"
12583                         " continuing anyway: %s", disk.iv_name, snode, msg)
12584
12585     feedback_fn("Removing unneeded volumes on the primary node...")
12586     for idx, disk in enumerate(old_disks):
12587       meta = disk.children[1]
12588       self.cfg.SetDiskID(meta, pnode)
12589       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12590       if msg:
12591         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12592                         " continuing anyway: %s", idx, pnode, msg)
12593
12594     # this is a DRBD disk, return its port to the pool
12595     for disk in old_disks:
12596       tcp_port = disk.logical_id[2]
12597       self.cfg.AddTcpUdpPort(tcp_port)
12598
12599     # Node resource locks will be released by caller
12600
12601   def _CreateNewDisk(self, idx, params, _):
12602     """Creates a new disk.
12603
12604     """
12605     instance = self.instance
12606
12607     # add a new disk
12608     if instance.disk_template in constants.DTS_FILEBASED:
12609       (file_driver, file_path) = instance.disks[0].logical_id
12610       file_path = os.path.dirname(file_path)
12611     else:
12612       file_driver = file_path = None
12613
12614     disk = \
12615       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12616                             instance.primary_node, instance.secondary_nodes,
12617                             [params], file_path, file_driver, idx,
12618                             self.Log, self.diskparams)[0]
12619
12620     info = _GetInstanceInfoText(instance)
12621
12622     logging.info("Creating volume %s for instance %s",
12623                  disk.iv_name, instance.name)
12624     # Note: this needs to be kept in sync with _CreateDisks
12625     #HARDCODE
12626     for node in instance.all_nodes:
12627       f_create = (node == instance.primary_node)
12628       try:
12629         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12630       except errors.OpExecError, err:
12631         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12632                         disk.iv_name, disk, node, err)
12633
12634     return (disk, [
12635       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12636       ])
12637
12638   @staticmethod
12639   def _ModifyDisk(idx, disk, params, _):
12640     """Modifies a disk.
12641
12642     """
12643     disk.mode = params[constants.IDISK_MODE]
12644
12645     return [
12646       ("disk.mode/%d" % idx, disk.mode),
12647       ]
12648
12649   def _RemoveDisk(self, idx, root, _):
12650     """Removes a disk.
12651
12652     """
12653     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12654       self.cfg.SetDiskID(disk, node)
12655       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12656       if msg:
12657         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12658                         " continuing anyway", idx, node, msg)
12659
12660     # if this is a DRBD disk, return its port to the pool
12661     if root.dev_type in constants.LDS_DRBD:
12662       self.cfg.AddTcpUdpPort(root.logical_id[2])
12663
12664   @staticmethod
12665   def _CreateNewNic(idx, params, private):
12666     """Creates data structure for a new network interface.
12667
12668     """
12669     mac = params[constants.INIC_MAC]
12670     ip = params.get(constants.INIC_IP, None)
12671     nicparams = private.params
12672
12673     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12674       ("nic.%d" % idx,
12675        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12676        (mac, ip, private.filled[constants.NIC_MODE],
12677        private.filled[constants.NIC_LINK])),
12678       ])
12679
12680   @staticmethod
12681   def _ApplyNicMods(idx, nic, params, private):
12682     """Modifies a network interface.
12683
12684     """
12685     changes = []
12686
12687     for key in [constants.INIC_MAC, constants.INIC_IP]:
12688       if key in params:
12689         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12690         setattr(nic, key, params[key])
12691
12692     if private.params:
12693       nic.nicparams = private.params
12694
12695       for (key, val) in params.items():
12696         changes.append(("nic.%s/%d" % (key, idx), val))
12697
12698     return changes
12699
12700   def Exec(self, feedback_fn):
12701     """Modifies an instance.
12702
12703     All parameters take effect only at the next restart of the instance.
12704
12705     """
12706     # Process here the warnings from CheckPrereq, as we don't have a
12707     # feedback_fn there.
12708     # TODO: Replace with self.LogWarning
12709     for warn in self.warn:
12710       feedback_fn("WARNING: %s" % warn)
12711
12712     assert ((self.op.disk_template is None) ^
12713             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12714       "Not owning any node resource locks"
12715
12716     result = []
12717     instance = self.instance
12718
12719     # runtime memory
12720     if self.op.runtime_mem:
12721       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12722                                                      instance,
12723                                                      self.op.runtime_mem)
12724       rpcres.Raise("Cannot modify instance runtime memory")
12725       result.append(("runtime_memory", self.op.runtime_mem))
12726
12727     # Apply disk changes
12728     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12729                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12730     _UpdateIvNames(0, instance.disks)
12731
12732     if self.op.disk_template:
12733       if __debug__:
12734         check_nodes = set(instance.all_nodes)
12735         if self.op.remote_node:
12736           check_nodes.add(self.op.remote_node)
12737         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12738           owned = self.owned_locks(level)
12739           assert not (check_nodes - owned), \
12740             ("Not owning the correct locks, owning %r, expected at least %r" %
12741              (owned, check_nodes))
12742
12743       r_shut = _ShutdownInstanceDisks(self, instance)
12744       if not r_shut:
12745         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12746                                  " proceed with disk template conversion")
12747       mode = (instance.disk_template, self.op.disk_template)
12748       try:
12749         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12750       except:
12751         self.cfg.ReleaseDRBDMinors(instance.name)
12752         raise
12753       result.append(("disk_template", self.op.disk_template))
12754
12755       assert instance.disk_template == self.op.disk_template, \
12756         ("Expected disk template '%s', found '%s'" %
12757          (self.op.disk_template, instance.disk_template))
12758
12759     # Release node and resource locks if there are any (they might already have
12760     # been released during disk conversion)
12761     _ReleaseLocks(self, locking.LEVEL_NODE)
12762     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12763
12764     # Apply NIC changes
12765     if self._new_nics is not None:
12766       instance.nics = self._new_nics
12767       result.extend(self._nic_chgdesc)
12768
12769     # hvparams changes
12770     if self.op.hvparams:
12771       instance.hvparams = self.hv_inst
12772       for key, val in self.op.hvparams.iteritems():
12773         result.append(("hv/%s" % key, val))
12774
12775     # beparams changes
12776     if self.op.beparams:
12777       instance.beparams = self.be_inst
12778       for key, val in self.op.beparams.iteritems():
12779         result.append(("be/%s" % key, val))
12780
12781     # OS change
12782     if self.op.os_name:
12783       instance.os = self.op.os_name
12784
12785     # osparams changes
12786     if self.op.osparams:
12787       instance.osparams = self.os_inst
12788       for key, val in self.op.osparams.iteritems():
12789         result.append(("os/%s" % key, val))
12790
12791     if self.op.offline is None:
12792       # Ignore
12793       pass
12794     elif self.op.offline:
12795       # Mark instance as offline
12796       self.cfg.MarkInstanceOffline(instance.name)
12797       result.append(("admin_state", constants.ADMINST_OFFLINE))
12798     else:
12799       # Mark instance as online, but stopped
12800       self.cfg.MarkInstanceDown(instance.name)
12801       result.append(("admin_state", constants.ADMINST_DOWN))
12802
12803     self.cfg.Update(instance, feedback_fn)
12804
12805     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12806                 self.owned_locks(locking.LEVEL_NODE)), \
12807       "All node locks should have been released by now"
12808
12809     return result
12810
12811   _DISK_CONVERSIONS = {
12812     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12813     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12814     }
12815
12816
12817 class LUInstanceChangeGroup(LogicalUnit):
12818   HPATH = "instance-change-group"
12819   HTYPE = constants.HTYPE_INSTANCE
12820   REQ_BGL = False
12821
12822   def ExpandNames(self):
12823     self.share_locks = _ShareAll()
12824     self.needed_locks = {
12825       locking.LEVEL_NODEGROUP: [],
12826       locking.LEVEL_NODE: [],
12827       }
12828
12829     self._ExpandAndLockInstance()
12830
12831     if self.op.target_groups:
12832       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12833                                   self.op.target_groups)
12834     else:
12835       self.req_target_uuids = None
12836
12837     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12838
12839   def DeclareLocks(self, level):
12840     if level == locking.LEVEL_NODEGROUP:
12841       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12842
12843       if self.req_target_uuids:
12844         lock_groups = set(self.req_target_uuids)
12845
12846         # Lock all groups used by instance optimistically; this requires going
12847         # via the node before it's locked, requiring verification later on
12848         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12849         lock_groups.update(instance_groups)
12850       else:
12851         # No target groups, need to lock all of them
12852         lock_groups = locking.ALL_SET
12853
12854       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12855
12856     elif level == locking.LEVEL_NODE:
12857       if self.req_target_uuids:
12858         # Lock all nodes used by instances
12859         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12860         self._LockInstancesNodes()
12861
12862         # Lock all nodes in all potential target groups
12863         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12864                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12865         member_nodes = [node_name
12866                         for group in lock_groups
12867                         for node_name in self.cfg.GetNodeGroup(group).members]
12868         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12869       else:
12870         # Lock all nodes as all groups are potential targets
12871         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12872
12873   def CheckPrereq(self):
12874     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12875     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12876     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12877
12878     assert (self.req_target_uuids is None or
12879             owned_groups.issuperset(self.req_target_uuids))
12880     assert owned_instances == set([self.op.instance_name])
12881
12882     # Get instance information
12883     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12884
12885     # Check if node groups for locked instance are still correct
12886     assert owned_nodes.issuperset(self.instance.all_nodes), \
12887       ("Instance %s's nodes changed while we kept the lock" %
12888        self.op.instance_name)
12889
12890     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12891                                            owned_groups)
12892
12893     if self.req_target_uuids:
12894       # User requested specific target groups
12895       self.target_uuids = self.req_target_uuids
12896     else:
12897       # All groups except those used by the instance are potential targets
12898       self.target_uuids = owned_groups - inst_groups
12899
12900     conflicting_groups = self.target_uuids & inst_groups
12901     if conflicting_groups:
12902       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12903                                  " used by the instance '%s'" %
12904                                  (utils.CommaJoin(conflicting_groups),
12905                                   self.op.instance_name),
12906                                  errors.ECODE_INVAL)
12907
12908     if not self.target_uuids:
12909       raise errors.OpPrereqError("There are no possible target groups",
12910                                  errors.ECODE_INVAL)
12911
12912   def BuildHooksEnv(self):
12913     """Build hooks env.
12914
12915     """
12916     assert self.target_uuids
12917
12918     env = {
12919       "TARGET_GROUPS": " ".join(self.target_uuids),
12920       }
12921
12922     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12923
12924     return env
12925
12926   def BuildHooksNodes(self):
12927     """Build hooks nodes.
12928
12929     """
12930     mn = self.cfg.GetMasterNode()
12931     return ([mn], [mn])
12932
12933   def Exec(self, feedback_fn):
12934     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12935
12936     assert instances == [self.op.instance_name], "Instance not locked"
12937
12938     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12939                      instances=instances, target_groups=list(self.target_uuids))
12940
12941     ial.Run(self.op.iallocator)
12942
12943     if not ial.success:
12944       raise errors.OpPrereqError("Can't compute solution for changing group of"
12945                                  " instance '%s' using iallocator '%s': %s" %
12946                                  (self.op.instance_name, self.op.iallocator,
12947                                   ial.info),
12948                                  errors.ECODE_NORES)
12949
12950     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12951
12952     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12953                  " instance '%s'", len(jobs), self.op.instance_name)
12954
12955     return ResultWithJobs(jobs)
12956
12957
12958 class LUBackupQuery(NoHooksLU):
12959   """Query the exports list
12960
12961   """
12962   REQ_BGL = False
12963
12964   def ExpandNames(self):
12965     self.needed_locks = {}
12966     self.share_locks[locking.LEVEL_NODE] = 1
12967     if not self.op.nodes:
12968       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12969     else:
12970       self.needed_locks[locking.LEVEL_NODE] = \
12971         _GetWantedNodes(self, self.op.nodes)
12972
12973   def Exec(self, feedback_fn):
12974     """Compute the list of all the exported system images.
12975
12976     @rtype: dict
12977     @return: a dictionary with the structure node->(export-list)
12978         where export-list is a list of the instances exported on
12979         that node.
12980
12981     """
12982     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12983     rpcresult = self.rpc.call_export_list(self.nodes)
12984     result = {}
12985     for node in rpcresult:
12986       if rpcresult[node].fail_msg:
12987         result[node] = False
12988       else:
12989         result[node] = rpcresult[node].payload
12990
12991     return result
12992
12993
12994 class LUBackupPrepare(NoHooksLU):
12995   """Prepares an instance for an export and returns useful information.
12996
12997   """
12998   REQ_BGL = False
12999
13000   def ExpandNames(self):
13001     self._ExpandAndLockInstance()
13002
13003   def CheckPrereq(self):
13004     """Check prerequisites.
13005
13006     """
13007     instance_name = self.op.instance_name
13008
13009     self.instance = self.cfg.GetInstanceInfo(instance_name)
13010     assert self.instance is not None, \
13011           "Cannot retrieve locked instance %s" % self.op.instance_name
13012     _CheckNodeOnline(self, self.instance.primary_node)
13013
13014     self._cds = _GetClusterDomainSecret()
13015
13016   def Exec(self, feedback_fn):
13017     """Prepares an instance for an export.
13018
13019     """
13020     instance = self.instance
13021
13022     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13023       salt = utils.GenerateSecret(8)
13024
13025       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13026       result = self.rpc.call_x509_cert_create(instance.primary_node,
13027                                               constants.RIE_CERT_VALIDITY)
13028       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13029
13030       (name, cert_pem) = result.payload
13031
13032       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13033                                              cert_pem)
13034
13035       return {
13036         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13037         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13038                           salt),
13039         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13040         }
13041
13042     return None
13043
13044
13045 class LUBackupExport(LogicalUnit):
13046   """Export an instance to an image in the cluster.
13047
13048   """
13049   HPATH = "instance-export"
13050   HTYPE = constants.HTYPE_INSTANCE
13051   REQ_BGL = False
13052
13053   def CheckArguments(self):
13054     """Check the arguments.
13055
13056     """
13057     self.x509_key_name = self.op.x509_key_name
13058     self.dest_x509_ca_pem = self.op.destination_x509_ca
13059
13060     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13061       if not self.x509_key_name:
13062         raise errors.OpPrereqError("Missing X509 key name for encryption",
13063                                    errors.ECODE_INVAL)
13064
13065       if not self.dest_x509_ca_pem:
13066         raise errors.OpPrereqError("Missing destination X509 CA",
13067                                    errors.ECODE_INVAL)
13068
13069   def ExpandNames(self):
13070     self._ExpandAndLockInstance()
13071
13072     # Lock all nodes for local exports
13073     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13074       # FIXME: lock only instance primary and destination node
13075       #
13076       # Sad but true, for now we have do lock all nodes, as we don't know where
13077       # the previous export might be, and in this LU we search for it and
13078       # remove it from its current node. In the future we could fix this by:
13079       #  - making a tasklet to search (share-lock all), then create the
13080       #    new one, then one to remove, after
13081       #  - removing the removal operation altogether
13082       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13083
13084   def DeclareLocks(self, level):
13085     """Last minute lock declaration."""
13086     # All nodes are locked anyway, so nothing to do here.
13087
13088   def BuildHooksEnv(self):
13089     """Build hooks env.
13090
13091     This will run on the master, primary node and target node.
13092
13093     """
13094     env = {
13095       "EXPORT_MODE": self.op.mode,
13096       "EXPORT_NODE": self.op.target_node,
13097       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13098       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13099       # TODO: Generic function for boolean env variables
13100       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13101       }
13102
13103     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13104
13105     return env
13106
13107   def BuildHooksNodes(self):
13108     """Build hooks nodes.
13109
13110     """
13111     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13112
13113     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13114       nl.append(self.op.target_node)
13115
13116     return (nl, nl)
13117
13118   def CheckPrereq(self):
13119     """Check prerequisites.
13120
13121     This checks that the instance and node names are valid.
13122
13123     """
13124     instance_name = self.op.instance_name
13125
13126     self.instance = self.cfg.GetInstanceInfo(instance_name)
13127     assert self.instance is not None, \
13128           "Cannot retrieve locked instance %s" % self.op.instance_name
13129     _CheckNodeOnline(self, self.instance.primary_node)
13130
13131     if (self.op.remove_instance and
13132         self.instance.admin_state == constants.ADMINST_UP and
13133         not self.op.shutdown):
13134       raise errors.OpPrereqError("Can not remove instance without shutting it"
13135                                  " down before")
13136
13137     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13138       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13139       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13140       assert self.dst_node is not None
13141
13142       _CheckNodeOnline(self, self.dst_node.name)
13143       _CheckNodeNotDrained(self, self.dst_node.name)
13144
13145       self._cds = None
13146       self.dest_disk_info = None
13147       self.dest_x509_ca = None
13148
13149     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13150       self.dst_node = None
13151
13152       if len(self.op.target_node) != len(self.instance.disks):
13153         raise errors.OpPrereqError(("Received destination information for %s"
13154                                     " disks, but instance %s has %s disks") %
13155                                    (len(self.op.target_node), instance_name,
13156                                     len(self.instance.disks)),
13157                                    errors.ECODE_INVAL)
13158
13159       cds = _GetClusterDomainSecret()
13160
13161       # Check X509 key name
13162       try:
13163         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13164       except (TypeError, ValueError), err:
13165         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13166
13167       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13168         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13169                                    errors.ECODE_INVAL)
13170
13171       # Load and verify CA
13172       try:
13173         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13174       except OpenSSL.crypto.Error, err:
13175         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13176                                    (err, ), errors.ECODE_INVAL)
13177
13178       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13179       if errcode is not None:
13180         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13181                                    (msg, ), errors.ECODE_INVAL)
13182
13183       self.dest_x509_ca = cert
13184
13185       # Verify target information
13186       disk_info = []
13187       for idx, disk_data in enumerate(self.op.target_node):
13188         try:
13189           (host, port, magic) = \
13190             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13191         except errors.GenericError, err:
13192           raise errors.OpPrereqError("Target info for disk %s: %s" %
13193                                      (idx, err), errors.ECODE_INVAL)
13194
13195         disk_info.append((host, port, magic))
13196
13197       assert len(disk_info) == len(self.op.target_node)
13198       self.dest_disk_info = disk_info
13199
13200     else:
13201       raise errors.ProgrammerError("Unhandled export mode %r" %
13202                                    self.op.mode)
13203
13204     # instance disk type verification
13205     # TODO: Implement export support for file-based disks
13206     for disk in self.instance.disks:
13207       if disk.dev_type == constants.LD_FILE:
13208         raise errors.OpPrereqError("Export not supported for instances with"
13209                                    " file-based disks", errors.ECODE_INVAL)
13210
13211   def _CleanupExports(self, feedback_fn):
13212     """Removes exports of current instance from all other nodes.
13213
13214     If an instance in a cluster with nodes A..D was exported to node C, its
13215     exports will be removed from the nodes A, B and D.
13216
13217     """
13218     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13219
13220     nodelist = self.cfg.GetNodeList()
13221     nodelist.remove(self.dst_node.name)
13222
13223     # on one-node clusters nodelist will be empty after the removal
13224     # if we proceed the backup would be removed because OpBackupQuery
13225     # substitutes an empty list with the full cluster node list.
13226     iname = self.instance.name
13227     if nodelist:
13228       feedback_fn("Removing old exports for instance %s" % iname)
13229       exportlist = self.rpc.call_export_list(nodelist)
13230       for node in exportlist:
13231         if exportlist[node].fail_msg:
13232           continue
13233         if iname in exportlist[node].payload:
13234           msg = self.rpc.call_export_remove(node, iname).fail_msg
13235           if msg:
13236             self.LogWarning("Could not remove older export for instance %s"
13237                             " on node %s: %s", iname, node, msg)
13238
13239   def Exec(self, feedback_fn):
13240     """Export an instance to an image in the cluster.
13241
13242     """
13243     assert self.op.mode in constants.EXPORT_MODES
13244
13245     instance = self.instance
13246     src_node = instance.primary_node
13247
13248     if self.op.shutdown:
13249       # shutdown the instance, but not the disks
13250       feedback_fn("Shutting down instance %s" % instance.name)
13251       result = self.rpc.call_instance_shutdown(src_node, instance,
13252                                                self.op.shutdown_timeout)
13253       # TODO: Maybe ignore failures if ignore_remove_failures is set
13254       result.Raise("Could not shutdown instance %s on"
13255                    " node %s" % (instance.name, src_node))
13256
13257     # set the disks ID correctly since call_instance_start needs the
13258     # correct drbd minor to create the symlinks
13259     for disk in instance.disks:
13260       self.cfg.SetDiskID(disk, src_node)
13261
13262     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13263
13264     if activate_disks:
13265       # Activate the instance disks if we'exporting a stopped instance
13266       feedback_fn("Activating disks for %s" % instance.name)
13267       _StartInstanceDisks(self, instance, None)
13268
13269     try:
13270       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13271                                                      instance)
13272
13273       helper.CreateSnapshots()
13274       try:
13275         if (self.op.shutdown and
13276             instance.admin_state == constants.ADMINST_UP and
13277             not self.op.remove_instance):
13278           assert not activate_disks
13279           feedback_fn("Starting instance %s" % instance.name)
13280           result = self.rpc.call_instance_start(src_node,
13281                                                 (instance, None, None), False)
13282           msg = result.fail_msg
13283           if msg:
13284             feedback_fn("Failed to start instance: %s" % msg)
13285             _ShutdownInstanceDisks(self, instance)
13286             raise errors.OpExecError("Could not start instance: %s" % msg)
13287
13288         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13289           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13290         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13291           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13292           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13293
13294           (key_name, _, _) = self.x509_key_name
13295
13296           dest_ca_pem = \
13297             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13298                                             self.dest_x509_ca)
13299
13300           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13301                                                      key_name, dest_ca_pem,
13302                                                      timeouts)
13303       finally:
13304         helper.Cleanup()
13305
13306       # Check for backwards compatibility
13307       assert len(dresults) == len(instance.disks)
13308       assert compat.all(isinstance(i, bool) for i in dresults), \
13309              "Not all results are boolean: %r" % dresults
13310
13311     finally:
13312       if activate_disks:
13313         feedback_fn("Deactivating disks for %s" % instance.name)
13314         _ShutdownInstanceDisks(self, instance)
13315
13316     if not (compat.all(dresults) and fin_resu):
13317       failures = []
13318       if not fin_resu:
13319         failures.append("export finalization")
13320       if not compat.all(dresults):
13321         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13322                                if not dsk)
13323         failures.append("disk export: disk(s) %s" % fdsk)
13324
13325       raise errors.OpExecError("Export failed, errors in %s" %
13326                                utils.CommaJoin(failures))
13327
13328     # At this point, the export was successful, we can cleanup/finish
13329
13330     # Remove instance if requested
13331     if self.op.remove_instance:
13332       feedback_fn("Removing instance %s" % instance.name)
13333       _RemoveInstance(self, feedback_fn, instance,
13334                       self.op.ignore_remove_failures)
13335
13336     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13337       self._CleanupExports(feedback_fn)
13338
13339     return fin_resu, dresults
13340
13341
13342 class LUBackupRemove(NoHooksLU):
13343   """Remove exports related to the named instance.
13344
13345   """
13346   REQ_BGL = False
13347
13348   def ExpandNames(self):
13349     self.needed_locks = {}
13350     # We need all nodes to be locked in order for RemoveExport to work, but we
13351     # don't need to lock the instance itself, as nothing will happen to it (and
13352     # we can remove exports also for a removed instance)
13353     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13354
13355   def Exec(self, feedback_fn):
13356     """Remove any export.
13357
13358     """
13359     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13360     # If the instance was not found we'll try with the name that was passed in.
13361     # This will only work if it was an FQDN, though.
13362     fqdn_warn = False
13363     if not instance_name:
13364       fqdn_warn = True
13365       instance_name = self.op.instance_name
13366
13367     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13368     exportlist = self.rpc.call_export_list(locked_nodes)
13369     found = False
13370     for node in exportlist:
13371       msg = exportlist[node].fail_msg
13372       if msg:
13373         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13374         continue
13375       if instance_name in exportlist[node].payload:
13376         found = True
13377         result = self.rpc.call_export_remove(node, instance_name)
13378         msg = result.fail_msg
13379         if msg:
13380           logging.error("Could not remove export for instance %s"
13381                         " on node %s: %s", instance_name, node, msg)
13382
13383     if fqdn_warn and not found:
13384       feedback_fn("Export not found. If trying to remove an export belonging"
13385                   " to a deleted instance please use its Fully Qualified"
13386                   " Domain Name.")
13387
13388
13389 class LUGroupAdd(LogicalUnit):
13390   """Logical unit for creating node groups.
13391
13392   """
13393   HPATH = "group-add"
13394   HTYPE = constants.HTYPE_GROUP
13395   REQ_BGL = False
13396
13397   def ExpandNames(self):
13398     # We need the new group's UUID here so that we can create and acquire the
13399     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13400     # that it should not check whether the UUID exists in the configuration.
13401     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13402     self.needed_locks = {}
13403     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13404
13405   def CheckPrereq(self):
13406     """Check prerequisites.
13407
13408     This checks that the given group name is not an existing node group
13409     already.
13410
13411     """
13412     try:
13413       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13414     except errors.OpPrereqError:
13415       pass
13416     else:
13417       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13418                                  " node group (UUID: %s)" %
13419                                  (self.op.group_name, existing_uuid),
13420                                  errors.ECODE_EXISTS)
13421
13422     if self.op.ndparams:
13423       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13424
13425     if self.op.hv_state:
13426       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13427     else:
13428       self.new_hv_state = None
13429
13430     if self.op.disk_state:
13431       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13432     else:
13433       self.new_disk_state = None
13434
13435     if self.op.diskparams:
13436       for templ in constants.DISK_TEMPLATES:
13437         if templ not in self.op.diskparams:
13438           self.op.diskparams[templ] = {}
13439         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13440     else:
13441       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13442
13443     if self.op.ipolicy:
13444       cluster = self.cfg.GetClusterInfo()
13445       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13446       try:
13447         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13448       except errors.ConfigurationError, err:
13449         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13450                                    errors.ECODE_INVAL)
13451
13452   def BuildHooksEnv(self):
13453     """Build hooks env.
13454
13455     """
13456     return {
13457       "GROUP_NAME": self.op.group_name,
13458       }
13459
13460   def BuildHooksNodes(self):
13461     """Build hooks nodes.
13462
13463     """
13464     mn = self.cfg.GetMasterNode()
13465     return ([mn], [mn])
13466
13467   def Exec(self, feedback_fn):
13468     """Add the node group to the cluster.
13469
13470     """
13471     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13472                                   uuid=self.group_uuid,
13473                                   alloc_policy=self.op.alloc_policy,
13474                                   ndparams=self.op.ndparams,
13475                                   diskparams=self.op.diskparams,
13476                                   ipolicy=self.op.ipolicy,
13477                                   hv_state_static=self.new_hv_state,
13478                                   disk_state_static=self.new_disk_state)
13479
13480     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13481     del self.remove_locks[locking.LEVEL_NODEGROUP]
13482
13483
13484 class LUGroupAssignNodes(NoHooksLU):
13485   """Logical unit for assigning nodes to groups.
13486
13487   """
13488   REQ_BGL = False
13489
13490   def ExpandNames(self):
13491     # These raise errors.OpPrereqError on their own:
13492     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13493     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13494
13495     # We want to lock all the affected nodes and groups. We have readily
13496     # available the list of nodes, and the *destination* group. To gather the
13497     # list of "source" groups, we need to fetch node information later on.
13498     self.needed_locks = {
13499       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13500       locking.LEVEL_NODE: self.op.nodes,
13501       }
13502
13503   def DeclareLocks(self, level):
13504     if level == locking.LEVEL_NODEGROUP:
13505       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13506
13507       # Try to get all affected nodes' groups without having the group or node
13508       # lock yet. Needs verification later in the code flow.
13509       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13510
13511       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13512
13513   def CheckPrereq(self):
13514     """Check prerequisites.
13515
13516     """
13517     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13518     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13519             frozenset(self.op.nodes))
13520
13521     expected_locks = (set([self.group_uuid]) |
13522                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13523     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13524     if actual_locks != expected_locks:
13525       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13526                                " current groups are '%s', used to be '%s'" %
13527                                (utils.CommaJoin(expected_locks),
13528                                 utils.CommaJoin(actual_locks)))
13529
13530     self.node_data = self.cfg.GetAllNodesInfo()
13531     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13532     instance_data = self.cfg.GetAllInstancesInfo()
13533
13534     if self.group is None:
13535       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13536                                (self.op.group_name, self.group_uuid))
13537
13538     (new_splits, previous_splits) = \
13539       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13540                                              for node in self.op.nodes],
13541                                             self.node_data, instance_data)
13542
13543     if new_splits:
13544       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13545
13546       if not self.op.force:
13547         raise errors.OpExecError("The following instances get split by this"
13548                                  " change and --force was not given: %s" %
13549                                  fmt_new_splits)
13550       else:
13551         self.LogWarning("This operation will split the following instances: %s",
13552                         fmt_new_splits)
13553
13554         if previous_splits:
13555           self.LogWarning("In addition, these already-split instances continue"
13556                           " to be split across groups: %s",
13557                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13558
13559   def Exec(self, feedback_fn):
13560     """Assign nodes to a new group.
13561
13562     """
13563     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13564
13565     self.cfg.AssignGroupNodes(mods)
13566
13567   @staticmethod
13568   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13569     """Check for split instances after a node assignment.
13570
13571     This method considers a series of node assignments as an atomic operation,
13572     and returns information about split instances after applying the set of
13573     changes.
13574
13575     In particular, it returns information about newly split instances, and
13576     instances that were already split, and remain so after the change.
13577
13578     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13579     considered.
13580
13581     @type changes: list of (node_name, new_group_uuid) pairs.
13582     @param changes: list of node assignments to consider.
13583     @param node_data: a dict with data for all nodes
13584     @param instance_data: a dict with all instances to consider
13585     @rtype: a two-tuple
13586     @return: a list of instances that were previously okay and result split as a
13587       consequence of this change, and a list of instances that were previously
13588       split and this change does not fix.
13589
13590     """
13591     changed_nodes = dict((node, group) for node, group in changes
13592                          if node_data[node].group != group)
13593
13594     all_split_instances = set()
13595     previously_split_instances = set()
13596
13597     def InstanceNodes(instance):
13598       return [instance.primary_node] + list(instance.secondary_nodes)
13599
13600     for inst in instance_data.values():
13601       if inst.disk_template not in constants.DTS_INT_MIRROR:
13602         continue
13603
13604       instance_nodes = InstanceNodes(inst)
13605
13606       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13607         previously_split_instances.add(inst.name)
13608
13609       if len(set(changed_nodes.get(node, node_data[node].group)
13610                  for node in instance_nodes)) > 1:
13611         all_split_instances.add(inst.name)
13612
13613     return (list(all_split_instances - previously_split_instances),
13614             list(previously_split_instances & all_split_instances))
13615
13616
13617 class _GroupQuery(_QueryBase):
13618   FIELDS = query.GROUP_FIELDS
13619
13620   def ExpandNames(self, lu):
13621     lu.needed_locks = {}
13622
13623     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13624     self._cluster = lu.cfg.GetClusterInfo()
13625     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13626
13627     if not self.names:
13628       self.wanted = [name_to_uuid[name]
13629                      for name in utils.NiceSort(name_to_uuid.keys())]
13630     else:
13631       # Accept names to be either names or UUIDs.
13632       missing = []
13633       self.wanted = []
13634       all_uuid = frozenset(self._all_groups.keys())
13635
13636       for name in self.names:
13637         if name in all_uuid:
13638           self.wanted.append(name)
13639         elif name in name_to_uuid:
13640           self.wanted.append(name_to_uuid[name])
13641         else:
13642           missing.append(name)
13643
13644       if missing:
13645         raise errors.OpPrereqError("Some groups do not exist: %s" %
13646                                    utils.CommaJoin(missing),
13647                                    errors.ECODE_NOENT)
13648
13649   def DeclareLocks(self, lu, level):
13650     pass
13651
13652   def _GetQueryData(self, lu):
13653     """Computes the list of node groups and their attributes.
13654
13655     """
13656     do_nodes = query.GQ_NODE in self.requested_data
13657     do_instances = query.GQ_INST in self.requested_data
13658
13659     group_to_nodes = None
13660     group_to_instances = None
13661
13662     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13663     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13664     # latter GetAllInstancesInfo() is not enough, for we have to go through
13665     # instance->node. Hence, we will need to process nodes even if we only need
13666     # instance information.
13667     if do_nodes or do_instances:
13668       all_nodes = lu.cfg.GetAllNodesInfo()
13669       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13670       node_to_group = {}
13671
13672       for node in all_nodes.values():
13673         if node.group in group_to_nodes:
13674           group_to_nodes[node.group].append(node.name)
13675           node_to_group[node.name] = node.group
13676
13677       if do_instances:
13678         all_instances = lu.cfg.GetAllInstancesInfo()
13679         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13680
13681         for instance in all_instances.values():
13682           node = instance.primary_node
13683           if node in node_to_group:
13684             group_to_instances[node_to_group[node]].append(instance.name)
13685
13686         if not do_nodes:
13687           # Do not pass on node information if it was not requested.
13688           group_to_nodes = None
13689
13690     return query.GroupQueryData(self._cluster,
13691                                 [self._all_groups[uuid]
13692                                  for uuid in self.wanted],
13693                                 group_to_nodes, group_to_instances)
13694
13695
13696 class LUGroupQuery(NoHooksLU):
13697   """Logical unit for querying node groups.
13698
13699   """
13700   REQ_BGL = False
13701
13702   def CheckArguments(self):
13703     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13704                           self.op.output_fields, False)
13705
13706   def ExpandNames(self):
13707     self.gq.ExpandNames(self)
13708
13709   def DeclareLocks(self, level):
13710     self.gq.DeclareLocks(self, level)
13711
13712   def Exec(self, feedback_fn):
13713     return self.gq.OldStyleQuery(self)
13714
13715
13716 class LUGroupSetParams(LogicalUnit):
13717   """Modifies the parameters of a node group.
13718
13719   """
13720   HPATH = "group-modify"
13721   HTYPE = constants.HTYPE_GROUP
13722   REQ_BGL = False
13723
13724   def CheckArguments(self):
13725     all_changes = [
13726       self.op.ndparams,
13727       self.op.diskparams,
13728       self.op.alloc_policy,
13729       self.op.hv_state,
13730       self.op.disk_state,
13731       self.op.ipolicy,
13732       ]
13733
13734     if all_changes.count(None) == len(all_changes):
13735       raise errors.OpPrereqError("Please pass at least one modification",
13736                                  errors.ECODE_INVAL)
13737
13738   def ExpandNames(self):
13739     # This raises errors.OpPrereqError on its own:
13740     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13741
13742     self.needed_locks = {
13743       locking.LEVEL_INSTANCE: [],
13744       locking.LEVEL_NODEGROUP: [self.group_uuid],
13745       }
13746
13747     self.share_locks[locking.LEVEL_INSTANCE] = 1
13748
13749   def DeclareLocks(self, level):
13750     if level == locking.LEVEL_INSTANCE:
13751       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13752
13753       # Lock instances optimistically, needs verification once group lock has
13754       # been acquired
13755       self.needed_locks[locking.LEVEL_INSTANCE] = \
13756           self.cfg.GetNodeGroupInstances(self.group_uuid)
13757
13758   def CheckPrereq(self):
13759     """Check prerequisites.
13760
13761     """
13762     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13763
13764     # Check if locked instances are still correct
13765     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13766
13767     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13768     cluster = self.cfg.GetClusterInfo()
13769
13770     if self.group is None:
13771       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13772                                (self.op.group_name, self.group_uuid))
13773
13774     if self.op.ndparams:
13775       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13776       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13777       self.new_ndparams = new_ndparams
13778
13779     if self.op.diskparams:
13780       self.new_diskparams = dict()
13781       for templ in constants.DISK_TEMPLATES:
13782         if templ not in self.op.diskparams:
13783           self.op.diskparams[templ] = {}
13784         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13785                                              self.op.diskparams[templ])
13786         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13787         self.new_diskparams[templ] = new_templ_params
13788
13789     if self.op.hv_state:
13790       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13791                                                  self.group.hv_state_static)
13792
13793     if self.op.disk_state:
13794       self.new_disk_state = \
13795         _MergeAndVerifyDiskState(self.op.disk_state,
13796                                  self.group.disk_state_static)
13797
13798     if self.op.ipolicy:
13799       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13800                                             self.op.ipolicy,
13801                                             group_policy=True)
13802
13803       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13804       inst_filter = lambda inst: inst.name in owned_instances
13805       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13806       violations = \
13807           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13808                                                                self.group),
13809                                         new_ipolicy, instances)
13810
13811       if violations:
13812         self.LogWarning("After the ipolicy change the following instances"
13813                         " violate them: %s",
13814                         utils.CommaJoin(violations))
13815
13816   def BuildHooksEnv(self):
13817     """Build hooks env.
13818
13819     """
13820     return {
13821       "GROUP_NAME": self.op.group_name,
13822       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13823       }
13824
13825   def BuildHooksNodes(self):
13826     """Build hooks nodes.
13827
13828     """
13829     mn = self.cfg.GetMasterNode()
13830     return ([mn], [mn])
13831
13832   def Exec(self, feedback_fn):
13833     """Modifies the node group.
13834
13835     """
13836     result = []
13837
13838     if self.op.ndparams:
13839       self.group.ndparams = self.new_ndparams
13840       result.append(("ndparams", str(self.group.ndparams)))
13841
13842     if self.op.diskparams:
13843       self.group.diskparams = self.new_diskparams
13844       result.append(("diskparams", str(self.group.diskparams)))
13845
13846     if self.op.alloc_policy:
13847       self.group.alloc_policy = self.op.alloc_policy
13848
13849     if self.op.hv_state:
13850       self.group.hv_state_static = self.new_hv_state
13851
13852     if self.op.disk_state:
13853       self.group.disk_state_static = self.new_disk_state
13854
13855     if self.op.ipolicy:
13856       self.group.ipolicy = self.new_ipolicy
13857
13858     self.cfg.Update(self.group, feedback_fn)
13859     return result
13860
13861
13862 class LUGroupRemove(LogicalUnit):
13863   HPATH = "group-remove"
13864   HTYPE = constants.HTYPE_GROUP
13865   REQ_BGL = False
13866
13867   def ExpandNames(self):
13868     # This will raises errors.OpPrereqError on its own:
13869     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13870     self.needed_locks = {
13871       locking.LEVEL_NODEGROUP: [self.group_uuid],
13872       }
13873
13874   def CheckPrereq(self):
13875     """Check prerequisites.
13876
13877     This checks that the given group name exists as a node group, that is
13878     empty (i.e., contains no nodes), and that is not the last group of the
13879     cluster.
13880
13881     """
13882     # Verify that the group is empty.
13883     group_nodes = [node.name
13884                    for node in self.cfg.GetAllNodesInfo().values()
13885                    if node.group == self.group_uuid]
13886
13887     if group_nodes:
13888       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13889                                  " nodes: %s" %
13890                                  (self.op.group_name,
13891                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13892                                  errors.ECODE_STATE)
13893
13894     # Verify the cluster would not be left group-less.
13895     if len(self.cfg.GetNodeGroupList()) == 1:
13896       raise errors.OpPrereqError("Group '%s' is the only group,"
13897                                  " cannot be removed" %
13898                                  self.op.group_name,
13899                                  errors.ECODE_STATE)
13900
13901   def BuildHooksEnv(self):
13902     """Build hooks env.
13903
13904     """
13905     return {
13906       "GROUP_NAME": self.op.group_name,
13907       }
13908
13909   def BuildHooksNodes(self):
13910     """Build hooks nodes.
13911
13912     """
13913     mn = self.cfg.GetMasterNode()
13914     return ([mn], [mn])
13915
13916   def Exec(self, feedback_fn):
13917     """Remove the node group.
13918
13919     """
13920     try:
13921       self.cfg.RemoveNodeGroup(self.group_uuid)
13922     except errors.ConfigurationError:
13923       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13924                                (self.op.group_name, self.group_uuid))
13925
13926     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13927
13928
13929 class LUGroupRename(LogicalUnit):
13930   HPATH = "group-rename"
13931   HTYPE = constants.HTYPE_GROUP
13932   REQ_BGL = False
13933
13934   def ExpandNames(self):
13935     # This raises errors.OpPrereqError on its own:
13936     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13937
13938     self.needed_locks = {
13939       locking.LEVEL_NODEGROUP: [self.group_uuid],
13940       }
13941
13942   def CheckPrereq(self):
13943     """Check prerequisites.
13944
13945     Ensures requested new name is not yet used.
13946
13947     """
13948     try:
13949       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13950     except errors.OpPrereqError:
13951       pass
13952     else:
13953       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13954                                  " node group (UUID: %s)" %
13955                                  (self.op.new_name, new_name_uuid),
13956                                  errors.ECODE_EXISTS)
13957
13958   def BuildHooksEnv(self):
13959     """Build hooks env.
13960
13961     """
13962     return {
13963       "OLD_NAME": self.op.group_name,
13964       "NEW_NAME": self.op.new_name,
13965       }
13966
13967   def BuildHooksNodes(self):
13968     """Build hooks nodes.
13969
13970     """
13971     mn = self.cfg.GetMasterNode()
13972
13973     all_nodes = self.cfg.GetAllNodesInfo()
13974     all_nodes.pop(mn, None)
13975
13976     run_nodes = [mn]
13977     run_nodes.extend(node.name for node in all_nodes.values()
13978                      if node.group == self.group_uuid)
13979
13980     return (run_nodes, run_nodes)
13981
13982   def Exec(self, feedback_fn):
13983     """Rename the node group.
13984
13985     """
13986     group = self.cfg.GetNodeGroup(self.group_uuid)
13987
13988     if group is None:
13989       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13990                                (self.op.group_name, self.group_uuid))
13991
13992     group.name = self.op.new_name
13993     self.cfg.Update(group, feedback_fn)
13994
13995     return self.op.new_name
13996
13997
13998 class LUGroupEvacuate(LogicalUnit):
13999   HPATH = "group-evacuate"
14000   HTYPE = constants.HTYPE_GROUP
14001   REQ_BGL = False
14002
14003   def ExpandNames(self):
14004     # This raises errors.OpPrereqError on its own:
14005     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14006
14007     if self.op.target_groups:
14008       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14009                                   self.op.target_groups)
14010     else:
14011       self.req_target_uuids = []
14012
14013     if self.group_uuid in self.req_target_uuids:
14014       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14015                                  " as a target group (targets are %s)" %
14016                                  (self.group_uuid,
14017                                   utils.CommaJoin(self.req_target_uuids)),
14018                                  errors.ECODE_INVAL)
14019
14020     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14021
14022     self.share_locks = _ShareAll()
14023     self.needed_locks = {
14024       locking.LEVEL_INSTANCE: [],
14025       locking.LEVEL_NODEGROUP: [],
14026       locking.LEVEL_NODE: [],
14027       }
14028
14029   def DeclareLocks(self, level):
14030     if level == locking.LEVEL_INSTANCE:
14031       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14032
14033       # Lock instances optimistically, needs verification once node and group
14034       # locks have been acquired
14035       self.needed_locks[locking.LEVEL_INSTANCE] = \
14036         self.cfg.GetNodeGroupInstances(self.group_uuid)
14037
14038     elif level == locking.LEVEL_NODEGROUP:
14039       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14040
14041       if self.req_target_uuids:
14042         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14043
14044         # Lock all groups used by instances optimistically; this requires going
14045         # via the node before it's locked, requiring verification later on
14046         lock_groups.update(group_uuid
14047                            for instance_name in
14048                              self.owned_locks(locking.LEVEL_INSTANCE)
14049                            for group_uuid in
14050                              self.cfg.GetInstanceNodeGroups(instance_name))
14051       else:
14052         # No target groups, need to lock all of them
14053         lock_groups = locking.ALL_SET
14054
14055       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14056
14057     elif level == locking.LEVEL_NODE:
14058       # This will only lock the nodes in the group to be evacuated which
14059       # contain actual instances
14060       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14061       self._LockInstancesNodes()
14062
14063       # Lock all nodes in group to be evacuated and target groups
14064       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14065       assert self.group_uuid in owned_groups
14066       member_nodes = [node_name
14067                       for group in owned_groups
14068                       for node_name in self.cfg.GetNodeGroup(group).members]
14069       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14070
14071   def CheckPrereq(self):
14072     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14073     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14074     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14075
14076     assert owned_groups.issuperset(self.req_target_uuids)
14077     assert self.group_uuid in owned_groups
14078
14079     # Check if locked instances are still correct
14080     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14081
14082     # Get instance information
14083     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14084
14085     # Check if node groups for locked instances are still correct
14086     for instance_name in owned_instances:
14087       inst = self.instances[instance_name]
14088       assert owned_nodes.issuperset(inst.all_nodes), \
14089         "Instance %s's nodes changed while we kept the lock" % instance_name
14090
14091       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14092                                              owned_groups)
14093
14094       assert self.group_uuid in inst_groups, \
14095         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14096
14097     if self.req_target_uuids:
14098       # User requested specific target groups
14099       self.target_uuids = self.req_target_uuids
14100     else:
14101       # All groups except the one to be evacuated are potential targets
14102       self.target_uuids = [group_uuid for group_uuid in owned_groups
14103                            if group_uuid != self.group_uuid]
14104
14105       if not self.target_uuids:
14106         raise errors.OpPrereqError("There are no possible target groups",
14107                                    errors.ECODE_INVAL)
14108
14109   def BuildHooksEnv(self):
14110     """Build hooks env.
14111
14112     """
14113     return {
14114       "GROUP_NAME": self.op.group_name,
14115       "TARGET_GROUPS": " ".join(self.target_uuids),
14116       }
14117
14118   def BuildHooksNodes(self):
14119     """Build hooks nodes.
14120
14121     """
14122     mn = self.cfg.GetMasterNode()
14123
14124     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14125
14126     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14127
14128     return (run_nodes, run_nodes)
14129
14130   def Exec(self, feedback_fn):
14131     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14132
14133     assert self.group_uuid not in self.target_uuids
14134
14135     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14136                      instances=instances, target_groups=self.target_uuids)
14137
14138     ial.Run(self.op.iallocator)
14139
14140     if not ial.success:
14141       raise errors.OpPrereqError("Can't compute group evacuation using"
14142                                  " iallocator '%s': %s" %
14143                                  (self.op.iallocator, ial.info),
14144                                  errors.ECODE_NORES)
14145
14146     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14147
14148     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14149                  len(jobs), self.op.group_name)
14150
14151     return ResultWithJobs(jobs)
14152
14153
14154 class TagsLU(NoHooksLU): # pylint: disable=W0223
14155   """Generic tags LU.
14156
14157   This is an abstract class which is the parent of all the other tags LUs.
14158
14159   """
14160   def ExpandNames(self):
14161     self.group_uuid = None
14162     self.needed_locks = {}
14163     if self.op.kind == constants.TAG_NODE:
14164       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14165       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14166     elif self.op.kind == constants.TAG_INSTANCE:
14167       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14168       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14169     elif self.op.kind == constants.TAG_NODEGROUP:
14170       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14171
14172     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14173     # not possible to acquire the BGL based on opcode parameters)
14174
14175   def CheckPrereq(self):
14176     """Check prerequisites.
14177
14178     """
14179     if self.op.kind == constants.TAG_CLUSTER:
14180       self.target = self.cfg.GetClusterInfo()
14181     elif self.op.kind == constants.TAG_NODE:
14182       self.target = self.cfg.GetNodeInfo(self.op.name)
14183     elif self.op.kind == constants.TAG_INSTANCE:
14184       self.target = self.cfg.GetInstanceInfo(self.op.name)
14185     elif self.op.kind == constants.TAG_NODEGROUP:
14186       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14187     else:
14188       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14189                                  str(self.op.kind), errors.ECODE_INVAL)
14190
14191
14192 class LUTagsGet(TagsLU):
14193   """Returns the tags of a given object.
14194
14195   """
14196   REQ_BGL = False
14197
14198   def ExpandNames(self):
14199     TagsLU.ExpandNames(self)
14200
14201     # Share locks as this is only a read operation
14202     self.share_locks = _ShareAll()
14203
14204   def Exec(self, feedback_fn):
14205     """Returns the tag list.
14206
14207     """
14208     return list(self.target.GetTags())
14209
14210
14211 class LUTagsSearch(NoHooksLU):
14212   """Searches the tags for a given pattern.
14213
14214   """
14215   REQ_BGL = False
14216
14217   def ExpandNames(self):
14218     self.needed_locks = {}
14219
14220   def CheckPrereq(self):
14221     """Check prerequisites.
14222
14223     This checks the pattern passed for validity by compiling it.
14224
14225     """
14226     try:
14227       self.re = re.compile(self.op.pattern)
14228     except re.error, err:
14229       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14230                                  (self.op.pattern, err), errors.ECODE_INVAL)
14231
14232   def Exec(self, feedback_fn):
14233     """Returns the tag list.
14234
14235     """
14236     cfg = self.cfg
14237     tgts = [("/cluster", cfg.GetClusterInfo())]
14238     ilist = cfg.GetAllInstancesInfo().values()
14239     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14240     nlist = cfg.GetAllNodesInfo().values()
14241     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14242     tgts.extend(("/nodegroup/%s" % n.name, n)
14243                 for n in cfg.GetAllNodeGroupsInfo().values())
14244     results = []
14245     for path, target in tgts:
14246       for tag in target.GetTags():
14247         if self.re.search(tag):
14248           results.append((path, tag))
14249     return results
14250
14251
14252 class LUTagsSet(TagsLU):
14253   """Sets a tag on a given object.
14254
14255   """
14256   REQ_BGL = False
14257
14258   def CheckPrereq(self):
14259     """Check prerequisites.
14260
14261     This checks the type and length of the tag name and value.
14262
14263     """
14264     TagsLU.CheckPrereq(self)
14265     for tag in self.op.tags:
14266       objects.TaggableObject.ValidateTag(tag)
14267
14268   def Exec(self, feedback_fn):
14269     """Sets the tag.
14270
14271     """
14272     try:
14273       for tag in self.op.tags:
14274         self.target.AddTag(tag)
14275     except errors.TagError, err:
14276       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14277     self.cfg.Update(self.target, feedback_fn)
14278
14279
14280 class LUTagsDel(TagsLU):
14281   """Delete a list of tags from a given object.
14282
14283   """
14284   REQ_BGL = False
14285
14286   def CheckPrereq(self):
14287     """Check prerequisites.
14288
14289     This checks that we have the given tag.
14290
14291     """
14292     TagsLU.CheckPrereq(self)
14293     for tag in self.op.tags:
14294       objects.TaggableObject.ValidateTag(tag)
14295     del_tags = frozenset(self.op.tags)
14296     cur_tags = self.target.GetTags()
14297
14298     diff_tags = del_tags - cur_tags
14299     if diff_tags:
14300       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14301       raise errors.OpPrereqError("Tag(s) %s not found" %
14302                                  (utils.CommaJoin(diff_names), ),
14303                                  errors.ECODE_NOENT)
14304
14305   def Exec(self, feedback_fn):
14306     """Remove the tag from the object.
14307
14308     """
14309     for tag in self.op.tags:
14310       self.target.RemoveTag(tag)
14311     self.cfg.Update(self.target, feedback_fn)
14312
14313
14314 class LUTestDelay(NoHooksLU):
14315   """Sleep for a specified amount of time.
14316
14317   This LU sleeps on the master and/or nodes for a specified amount of
14318   time.
14319
14320   """
14321   REQ_BGL = False
14322
14323   def ExpandNames(self):
14324     """Expand names and set required locks.
14325
14326     This expands the node list, if any.
14327
14328     """
14329     self.needed_locks = {}
14330     if self.op.on_nodes:
14331       # _GetWantedNodes can be used here, but is not always appropriate to use
14332       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14333       # more information.
14334       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14335       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14336
14337   def _TestDelay(self):
14338     """Do the actual sleep.
14339
14340     """
14341     if self.op.on_master:
14342       if not utils.TestDelay(self.op.duration):
14343         raise errors.OpExecError("Error during master delay test")
14344     if self.op.on_nodes:
14345       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14346       for node, node_result in result.items():
14347         node_result.Raise("Failure during rpc call to node %s" % node)
14348
14349   def Exec(self, feedback_fn):
14350     """Execute the test delay opcode, with the wanted repetitions.
14351
14352     """
14353     if self.op.repeat == 0:
14354       self._TestDelay()
14355     else:
14356       top_value = self.op.repeat - 1
14357       for i in range(self.op.repeat):
14358         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14359         self._TestDelay()
14360
14361
14362 class LUTestJqueue(NoHooksLU):
14363   """Utility LU to test some aspects of the job queue.
14364
14365   """
14366   REQ_BGL = False
14367
14368   # Must be lower than default timeout for WaitForJobChange to see whether it
14369   # notices changed jobs
14370   _CLIENT_CONNECT_TIMEOUT = 20.0
14371   _CLIENT_CONFIRM_TIMEOUT = 60.0
14372
14373   @classmethod
14374   def _NotifyUsingSocket(cls, cb, errcls):
14375     """Opens a Unix socket and waits for another program to connect.
14376
14377     @type cb: callable
14378     @param cb: Callback to send socket name to client
14379     @type errcls: class
14380     @param errcls: Exception class to use for errors
14381
14382     """
14383     # Using a temporary directory as there's no easy way to create temporary
14384     # sockets without writing a custom loop around tempfile.mktemp and
14385     # socket.bind
14386     tmpdir = tempfile.mkdtemp()
14387     try:
14388       tmpsock = utils.PathJoin(tmpdir, "sock")
14389
14390       logging.debug("Creating temporary socket at %s", tmpsock)
14391       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14392       try:
14393         sock.bind(tmpsock)
14394         sock.listen(1)
14395
14396         # Send details to client
14397         cb(tmpsock)
14398
14399         # Wait for client to connect before continuing
14400         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14401         try:
14402           (conn, _) = sock.accept()
14403         except socket.error, err:
14404           raise errcls("Client didn't connect in time (%s)" % err)
14405       finally:
14406         sock.close()
14407     finally:
14408       # Remove as soon as client is connected
14409       shutil.rmtree(tmpdir)
14410
14411     # Wait for client to close
14412     try:
14413       try:
14414         # pylint: disable=E1101
14415         # Instance of '_socketobject' has no ... member
14416         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14417         conn.recv(1)
14418       except socket.error, err:
14419         raise errcls("Client failed to confirm notification (%s)" % err)
14420     finally:
14421       conn.close()
14422
14423   def _SendNotification(self, test, arg, sockname):
14424     """Sends a notification to the client.
14425
14426     @type test: string
14427     @param test: Test name
14428     @param arg: Test argument (depends on test)
14429     @type sockname: string
14430     @param sockname: Socket path
14431
14432     """
14433     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14434
14435   def _Notify(self, prereq, test, arg):
14436     """Notifies the client of a test.
14437
14438     @type prereq: bool
14439     @param prereq: Whether this is a prereq-phase test
14440     @type test: string
14441     @param test: Test name
14442     @param arg: Test argument (depends on test)
14443
14444     """
14445     if prereq:
14446       errcls = errors.OpPrereqError
14447     else:
14448       errcls = errors.OpExecError
14449
14450     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14451                                                   test, arg),
14452                                    errcls)
14453
14454   def CheckArguments(self):
14455     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14456     self.expandnames_calls = 0
14457
14458   def ExpandNames(self):
14459     checkargs_calls = getattr(self, "checkargs_calls", 0)
14460     if checkargs_calls < 1:
14461       raise errors.ProgrammerError("CheckArguments was not called")
14462
14463     self.expandnames_calls += 1
14464
14465     if self.op.notify_waitlock:
14466       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14467
14468     self.LogInfo("Expanding names")
14469
14470     # Get lock on master node (just to get a lock, not for a particular reason)
14471     self.needed_locks = {
14472       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14473       }
14474
14475   def Exec(self, feedback_fn):
14476     if self.expandnames_calls < 1:
14477       raise errors.ProgrammerError("ExpandNames was not called")
14478
14479     if self.op.notify_exec:
14480       self._Notify(False, constants.JQT_EXEC, None)
14481
14482     self.LogInfo("Executing")
14483
14484     if self.op.log_messages:
14485       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14486       for idx, msg in enumerate(self.op.log_messages):
14487         self.LogInfo("Sending log message %s", idx + 1)
14488         feedback_fn(constants.JQT_MSGPREFIX + msg)
14489         # Report how many test messages have been sent
14490         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14491
14492     if self.op.fail:
14493       raise errors.OpExecError("Opcode failure was requested")
14494
14495     return True
14496
14497
14498 class IAllocator(object):
14499   """IAllocator framework.
14500
14501   An IAllocator instance has three sets of attributes:
14502     - cfg that is needed to query the cluster
14503     - input data (all members of the _KEYS class attribute are required)
14504     - four buffer attributes (in|out_data|text), that represent the
14505       input (to the external script) in text and data structure format,
14506       and the output from it, again in two formats
14507     - the result variables from the script (success, info, nodes) for
14508       easy usage
14509
14510   """
14511   # pylint: disable=R0902
14512   # lots of instance attributes
14513
14514   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14515     self.cfg = cfg
14516     self.rpc = rpc_runner
14517     # init buffer variables
14518     self.in_text = self.out_text = self.in_data = self.out_data = None
14519     # init all input fields so that pylint is happy
14520     self.mode = mode
14521     self.memory = self.disks = self.disk_template = self.spindle_usage = None
14522     self.os = self.tags = self.nics = self.vcpus = None
14523     self.hypervisor = None
14524     self.relocate_from = None
14525     self.name = None
14526     self.instances = None
14527     self.evac_mode = None
14528     self.target_groups = []
14529     # computed fields
14530     self.required_nodes = None
14531     # init result fields
14532     self.success = self.info = self.result = None
14533
14534     try:
14535       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14536     except KeyError:
14537       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14538                                    " IAllocator" % self.mode)
14539
14540     keyset = [n for (n, _) in keydata]
14541
14542     for key in kwargs:
14543       if key not in keyset:
14544         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14545                                      " IAllocator" % key)
14546       setattr(self, key, kwargs[key])
14547
14548     for key in keyset:
14549       if key not in kwargs:
14550         raise errors.ProgrammerError("Missing input parameter '%s' to"
14551                                      " IAllocator" % key)
14552     self._BuildInputData(compat.partial(fn, self), keydata)
14553
14554   def _ComputeClusterData(self):
14555     """Compute the generic allocator input data.
14556
14557     This is the data that is independent of the actual operation.
14558
14559     """
14560     cfg = self.cfg
14561     cluster_info = cfg.GetClusterInfo()
14562     # cluster data
14563     data = {
14564       "version": constants.IALLOCATOR_VERSION,
14565       "cluster_name": cfg.GetClusterName(),
14566       "cluster_tags": list(cluster_info.GetTags()),
14567       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14568       "ipolicy": cluster_info.ipolicy,
14569       }
14570     ninfo = cfg.GetAllNodesInfo()
14571     iinfo = cfg.GetAllInstancesInfo().values()
14572     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14573
14574     # node data
14575     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14576
14577     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14578       hypervisor_name = self.hypervisor
14579     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14580       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14581     else:
14582       hypervisor_name = cluster_info.primary_hypervisor
14583
14584     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14585                                         [hypervisor_name])
14586     node_iinfo = \
14587       self.rpc.call_all_instances_info(node_list,
14588                                        cluster_info.enabled_hypervisors)
14589
14590     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14591
14592     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14593     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14594                                                  i_list, config_ndata)
14595     assert len(data["nodes"]) == len(ninfo), \
14596         "Incomplete node data computed"
14597
14598     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14599
14600     self.in_data = data
14601
14602   @staticmethod
14603   def _ComputeNodeGroupData(cfg):
14604     """Compute node groups data.
14605
14606     """
14607     cluster = cfg.GetClusterInfo()
14608     ng = dict((guuid, {
14609       "name": gdata.name,
14610       "alloc_policy": gdata.alloc_policy,
14611       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14612       })
14613       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14614
14615     return ng
14616
14617   @staticmethod
14618   def _ComputeBasicNodeData(cfg, node_cfg):
14619     """Compute global node data.
14620
14621     @rtype: dict
14622     @returns: a dict of name: (node dict, node config)
14623
14624     """
14625     # fill in static (config-based) values
14626     node_results = dict((ninfo.name, {
14627       "tags": list(ninfo.GetTags()),
14628       "primary_ip": ninfo.primary_ip,
14629       "secondary_ip": ninfo.secondary_ip,
14630       "offline": ninfo.offline,
14631       "drained": ninfo.drained,
14632       "master_candidate": ninfo.master_candidate,
14633       "group": ninfo.group,
14634       "master_capable": ninfo.master_capable,
14635       "vm_capable": ninfo.vm_capable,
14636       "ndparams": cfg.GetNdParams(ninfo),
14637       })
14638       for ninfo in node_cfg.values())
14639
14640     return node_results
14641
14642   @staticmethod
14643   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14644                               node_results):
14645     """Compute global node data.
14646
14647     @param node_results: the basic node structures as filled from the config
14648
14649     """
14650     #TODO(dynmem): compute the right data on MAX and MIN memory
14651     # make a copy of the current dict
14652     node_results = dict(node_results)
14653     for nname, nresult in node_data.items():
14654       assert nname in node_results, "Missing basic data for node %s" % nname
14655       ninfo = node_cfg[nname]
14656
14657       if not (ninfo.offline or ninfo.drained):
14658         nresult.Raise("Can't get data for node %s" % nname)
14659         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14660                                 nname)
14661         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14662
14663         for attr in ["memory_total", "memory_free", "memory_dom0",
14664                      "vg_size", "vg_free", "cpu_total"]:
14665           if attr not in remote_info:
14666             raise errors.OpExecError("Node '%s' didn't return attribute"
14667                                      " '%s'" % (nname, attr))
14668           if not isinstance(remote_info[attr], int):
14669             raise errors.OpExecError("Node '%s' returned invalid value"
14670                                      " for '%s': %s" %
14671                                      (nname, attr, remote_info[attr]))
14672         # compute memory used by primary instances
14673         i_p_mem = i_p_up_mem = 0
14674         for iinfo, beinfo in i_list:
14675           if iinfo.primary_node == nname:
14676             i_p_mem += beinfo[constants.BE_MAXMEM]
14677             if iinfo.name not in node_iinfo[nname].payload:
14678               i_used_mem = 0
14679             else:
14680               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14681             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14682             remote_info["memory_free"] -= max(0, i_mem_diff)
14683
14684             if iinfo.admin_state == constants.ADMINST_UP:
14685               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14686
14687         # compute memory used by instances
14688         pnr_dyn = {
14689           "total_memory": remote_info["memory_total"],
14690           "reserved_memory": remote_info["memory_dom0"],
14691           "free_memory": remote_info["memory_free"],
14692           "total_disk": remote_info["vg_size"],
14693           "free_disk": remote_info["vg_free"],
14694           "total_cpus": remote_info["cpu_total"],
14695           "i_pri_memory": i_p_mem,
14696           "i_pri_up_memory": i_p_up_mem,
14697           }
14698         pnr_dyn.update(node_results[nname])
14699         node_results[nname] = pnr_dyn
14700
14701     return node_results
14702
14703   @staticmethod
14704   def _ComputeInstanceData(cluster_info, i_list):
14705     """Compute global instance data.
14706
14707     """
14708     instance_data = {}
14709     for iinfo, beinfo in i_list:
14710       nic_data = []
14711       for nic in iinfo.nics:
14712         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14713         nic_dict = {
14714           "mac": nic.mac,
14715           "ip": nic.ip,
14716           "mode": filled_params[constants.NIC_MODE],
14717           "link": filled_params[constants.NIC_LINK],
14718           }
14719         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14720           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14721         nic_data.append(nic_dict)
14722       pir = {
14723         "tags": list(iinfo.GetTags()),
14724         "admin_state": iinfo.admin_state,
14725         "vcpus": beinfo[constants.BE_VCPUS],
14726         "memory": beinfo[constants.BE_MAXMEM],
14727         "spindle_usage": beinfo[constants.BE_SPINDLE_USAGE],
14728         "os": iinfo.os,
14729         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14730         "nics": nic_data,
14731         "disks": [{constants.IDISK_SIZE: dsk.size,
14732                    constants.IDISK_MODE: dsk.mode}
14733                   for dsk in iinfo.disks],
14734         "disk_template": iinfo.disk_template,
14735         "hypervisor": iinfo.hypervisor,
14736         }
14737       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14738                                                  pir["disks"])
14739       instance_data[iinfo.name] = pir
14740
14741     return instance_data
14742
14743   def _AddNewInstance(self):
14744     """Add new instance data to allocator structure.
14745
14746     This in combination with _AllocatorGetClusterData will create the
14747     correct structure needed as input for the allocator.
14748
14749     The checks for the completeness of the opcode must have already been
14750     done.
14751
14752     """
14753     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14754
14755     if self.disk_template in constants.DTS_INT_MIRROR:
14756       self.required_nodes = 2
14757     else:
14758       self.required_nodes = 1
14759
14760     request = {
14761       "name": self.name,
14762       "disk_template": self.disk_template,
14763       "tags": self.tags,
14764       "os": self.os,
14765       "vcpus": self.vcpus,
14766       "memory": self.memory,
14767       "spindle_usage": self.spindle_usage,
14768       "disks": self.disks,
14769       "disk_space_total": disk_space,
14770       "nics": self.nics,
14771       "required_nodes": self.required_nodes,
14772       "hypervisor": self.hypervisor,
14773       }
14774
14775     return request
14776
14777   def _AddRelocateInstance(self):
14778     """Add relocate instance data to allocator structure.
14779
14780     This in combination with _IAllocatorGetClusterData will create the
14781     correct structure needed as input for the allocator.
14782
14783     The checks for the completeness of the opcode must have already been
14784     done.
14785
14786     """
14787     instance = self.cfg.GetInstanceInfo(self.name)
14788     if instance is None:
14789       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14790                                    " IAllocator" % self.name)
14791
14792     if instance.disk_template not in constants.DTS_MIRRORED:
14793       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14794                                  errors.ECODE_INVAL)
14795
14796     if instance.disk_template in constants.DTS_INT_MIRROR and \
14797         len(instance.secondary_nodes) != 1:
14798       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14799                                  errors.ECODE_STATE)
14800
14801     self.required_nodes = 1
14802     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14803     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14804
14805     request = {
14806       "name": self.name,
14807       "disk_space_total": disk_space,
14808       "required_nodes": self.required_nodes,
14809       "relocate_from": self.relocate_from,
14810       }
14811     return request
14812
14813   def _AddNodeEvacuate(self):
14814     """Get data for node-evacuate requests.
14815
14816     """
14817     return {
14818       "instances": self.instances,
14819       "evac_mode": self.evac_mode,
14820       }
14821
14822   def _AddChangeGroup(self):
14823     """Get data for node-evacuate requests.
14824
14825     """
14826     return {
14827       "instances": self.instances,
14828       "target_groups": self.target_groups,
14829       }
14830
14831   def _BuildInputData(self, fn, keydata):
14832     """Build input data structures.
14833
14834     """
14835     self._ComputeClusterData()
14836
14837     request = fn()
14838     request["type"] = self.mode
14839     for keyname, keytype in keydata:
14840       if keyname not in request:
14841         raise errors.ProgrammerError("Request parameter %s is missing" %
14842                                      keyname)
14843       val = request[keyname]
14844       if not keytype(val):
14845         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14846                                      " validation, value %s, expected"
14847                                      " type %s" % (keyname, val, keytype))
14848     self.in_data["request"] = request
14849
14850     self.in_text = serializer.Dump(self.in_data)
14851
14852   _STRING_LIST = ht.TListOf(ht.TString)
14853   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14854      # pylint: disable=E1101
14855      # Class '...' has no 'OP_ID' member
14856      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14857                           opcodes.OpInstanceMigrate.OP_ID,
14858                           opcodes.OpInstanceReplaceDisks.OP_ID])
14859      })))
14860
14861   _NEVAC_MOVED = \
14862     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14863                        ht.TItems([ht.TNonEmptyString,
14864                                   ht.TNonEmptyString,
14865                                   ht.TListOf(ht.TNonEmptyString),
14866                                  ])))
14867   _NEVAC_FAILED = \
14868     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14869                        ht.TItems([ht.TNonEmptyString,
14870                                   ht.TMaybeString,
14871                                  ])))
14872   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14873                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14874
14875   _MODE_DATA = {
14876     constants.IALLOCATOR_MODE_ALLOC:
14877       (_AddNewInstance,
14878        [
14879         ("name", ht.TString),
14880         ("memory", ht.TInt),
14881         ("spindle_usage", ht.TInt),
14882         ("disks", ht.TListOf(ht.TDict)),
14883         ("disk_template", ht.TString),
14884         ("os", ht.TString),
14885         ("tags", _STRING_LIST),
14886         ("nics", ht.TListOf(ht.TDict)),
14887         ("vcpus", ht.TInt),
14888         ("hypervisor", ht.TString),
14889         ], ht.TList),
14890     constants.IALLOCATOR_MODE_RELOC:
14891       (_AddRelocateInstance,
14892        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14893        ht.TList),
14894      constants.IALLOCATOR_MODE_NODE_EVAC:
14895       (_AddNodeEvacuate, [
14896         ("instances", _STRING_LIST),
14897         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14898         ], _NEVAC_RESULT),
14899      constants.IALLOCATOR_MODE_CHG_GROUP:
14900       (_AddChangeGroup, [
14901         ("instances", _STRING_LIST),
14902         ("target_groups", _STRING_LIST),
14903         ], _NEVAC_RESULT),
14904     }
14905
14906   def Run(self, name, validate=True, call_fn=None):
14907     """Run an instance allocator and return the results.
14908
14909     """
14910     if call_fn is None:
14911       call_fn = self.rpc.call_iallocator_runner
14912
14913     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14914     result.Raise("Failure while running the iallocator script")
14915
14916     self.out_text = result.payload
14917     if validate:
14918       self._ValidateResult()
14919
14920   def _ValidateResult(self):
14921     """Process the allocator results.
14922
14923     This will process and if successful save the result in
14924     self.out_data and the other parameters.
14925
14926     """
14927     try:
14928       rdict = serializer.Load(self.out_text)
14929     except Exception, err:
14930       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14931
14932     if not isinstance(rdict, dict):
14933       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14934
14935     # TODO: remove backwards compatiblity in later versions
14936     if "nodes" in rdict and "result" not in rdict:
14937       rdict["result"] = rdict["nodes"]
14938       del rdict["nodes"]
14939
14940     for key in "success", "info", "result":
14941       if key not in rdict:
14942         raise errors.OpExecError("Can't parse iallocator results:"
14943                                  " missing key '%s'" % key)
14944       setattr(self, key, rdict[key])
14945
14946     if not self._result_check(self.result):
14947       raise errors.OpExecError("Iallocator returned invalid result,"
14948                                " expected %s, got %s" %
14949                                (self._result_check, self.result),
14950                                errors.ECODE_INVAL)
14951
14952     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14953       assert self.relocate_from is not None
14954       assert self.required_nodes == 1
14955
14956       node2group = dict((name, ndata["group"])
14957                         for (name, ndata) in self.in_data["nodes"].items())
14958
14959       fn = compat.partial(self._NodesToGroups, node2group,
14960                           self.in_data["nodegroups"])
14961
14962       instance = self.cfg.GetInstanceInfo(self.name)
14963       request_groups = fn(self.relocate_from + [instance.primary_node])
14964       result_groups = fn(rdict["result"] + [instance.primary_node])
14965
14966       if self.success and not set(result_groups).issubset(request_groups):
14967         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14968                                  " differ from original groups (%s)" %
14969                                  (utils.CommaJoin(result_groups),
14970                                   utils.CommaJoin(request_groups)))
14971
14972     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14973       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14974
14975     self.out_data = rdict
14976
14977   @staticmethod
14978   def _NodesToGroups(node2group, groups, nodes):
14979     """Returns a list of unique group names for a list of nodes.
14980
14981     @type node2group: dict
14982     @param node2group: Map from node name to group UUID
14983     @type groups: dict
14984     @param groups: Group information
14985     @type nodes: list
14986     @param nodes: Node names
14987
14988     """
14989     result = set()
14990
14991     for node in nodes:
14992       try:
14993         group_uuid = node2group[node]
14994       except KeyError:
14995         # Ignore unknown node
14996         pass
14997       else:
14998         try:
14999           group = groups[group_uuid]
15000         except KeyError:
15001           # Can't find group, let's use UUID
15002           group_name = group_uuid
15003         else:
15004           group_name = group["name"]
15005
15006         result.add(group_name)
15007
15008     return sorted(result)
15009
15010
15011 class LUTestAllocator(NoHooksLU):
15012   """Run allocator tests.
15013
15014   This LU runs the allocator tests
15015
15016   """
15017   def CheckPrereq(self):
15018     """Check prerequisites.
15019
15020     This checks the opcode parameters depending on the director and mode test.
15021
15022     """
15023     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15024       for attr in ["memory", "disks", "disk_template",
15025                    "os", "tags", "nics", "vcpus"]:
15026         if not hasattr(self.op, attr):
15027           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15028                                      attr, errors.ECODE_INVAL)
15029       iname = self.cfg.ExpandInstanceName(self.op.name)
15030       if iname is not None:
15031         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15032                                    iname, errors.ECODE_EXISTS)
15033       if not isinstance(self.op.nics, list):
15034         raise errors.OpPrereqError("Invalid parameter 'nics'",
15035                                    errors.ECODE_INVAL)
15036       if not isinstance(self.op.disks, list):
15037         raise errors.OpPrereqError("Invalid parameter 'disks'",
15038                                    errors.ECODE_INVAL)
15039       for row in self.op.disks:
15040         if (not isinstance(row, dict) or
15041             constants.IDISK_SIZE not in row or
15042             not isinstance(row[constants.IDISK_SIZE], int) or
15043             constants.IDISK_MODE not in row or
15044             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15045           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15046                                      " parameter", errors.ECODE_INVAL)
15047       if self.op.hypervisor is None:
15048         self.op.hypervisor = self.cfg.GetHypervisorType()
15049     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15050       fname = _ExpandInstanceName(self.cfg, self.op.name)
15051       self.op.name = fname
15052       self.relocate_from = \
15053           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15054     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15055                           constants.IALLOCATOR_MODE_NODE_EVAC):
15056       if not self.op.instances:
15057         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15058       self.op.instances = _GetWantedInstances(self, self.op.instances)
15059     else:
15060       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15061                                  self.op.mode, errors.ECODE_INVAL)
15062
15063     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15064       if self.op.allocator is None:
15065         raise errors.OpPrereqError("Missing allocator name",
15066                                    errors.ECODE_INVAL)
15067     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15068       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15069                                  self.op.direction, errors.ECODE_INVAL)
15070
15071   def Exec(self, feedback_fn):
15072     """Run the allocator test.
15073
15074     """
15075     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15076       ial = IAllocator(self.cfg, self.rpc,
15077                        mode=self.op.mode,
15078                        name=self.op.name,
15079                        memory=self.op.memory,
15080                        disks=self.op.disks,
15081                        disk_template=self.op.disk_template,
15082                        os=self.op.os,
15083                        tags=self.op.tags,
15084                        nics=self.op.nics,
15085                        vcpus=self.op.vcpus,
15086                        hypervisor=self.op.hypervisor,
15087                        )
15088     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15089       ial = IAllocator(self.cfg, self.rpc,
15090                        mode=self.op.mode,
15091                        name=self.op.name,
15092                        relocate_from=list(self.relocate_from),
15093                        )
15094     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15095       ial = IAllocator(self.cfg, self.rpc,
15096                        mode=self.op.mode,
15097                        instances=self.op.instances,
15098                        target_groups=self.op.target_groups)
15099     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15100       ial = IAllocator(self.cfg, self.rpc,
15101                        mode=self.op.mode,
15102                        instances=self.op.instances,
15103                        evac_mode=self.op.evac_mode)
15104     else:
15105       raise errors.ProgrammerError("Uncatched mode %s in"
15106                                    " LUTestAllocator.Exec", self.op.mode)
15107
15108     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15109       result = ial.in_text
15110     else:
15111       ial.Run(self.op.allocator, validate=False)
15112       result = ial.out_text
15113     return result
15114
15115
15116 #: Query type implementations
15117 _QUERY_IMPL = {
15118   constants.QR_INSTANCE: _InstanceQuery,
15119   constants.QR_NODE: _NodeQuery,
15120   constants.QR_GROUP: _GroupQuery,
15121   constants.QR_OS: _OsQuery,
15122   }
15123
15124 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15125
15126
15127 def _GetQueryImplementation(name):
15128   """Returns the implemtnation for a query type.
15129
15130   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15131
15132   """
15133   try:
15134     return _QUERY_IMPL[name]
15135   except KeyError:
15136     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15137                                errors.ECODE_INVAL)