code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43 import ipaddr
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63 from ganeti import pathutils
  64 from ganeti import vcluster
  65 from ganeti import network
  66 from ganeti.masterd import iallocator
  67
  68 import ganeti.masterd.instance # pylint: disable=W0611
  69
  70
  71 # States of instance
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  74 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  75
  76 #: Instance status in which an instance can be marked as offline/online
  77 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  78   constants.ADMINST_OFFLINE,
  79   ]))
  80
  81
  82 class ResultWithJobs:
  83   """Data container for LU results with jobs.
  84
  85   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  86   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  87   contained in the C{jobs} attribute and include the job IDs in the opcode
  88   result.
  89
  90   """
  91   def __init__(self, jobs, **kwargs):
  92     """Initializes this class.
  93
  94     Additional return values can be specified as keyword arguments.
  95
  96     @type jobs: list of lists of L{opcode.OpCode}
  97     @param jobs: A list of lists of opcode objects
  98
  99     """
 100     self.jobs = jobs
 101     self.other = kwargs
 102
 103
 104 class LogicalUnit(object):
 105   """Logical Unit base class.
 106
 107   Subclasses must follow these rules:
 108     - implement ExpandNames
 109     - implement CheckPrereq (except when tasklets are used)
 110     - implement Exec (except when tasklets are used)
 111     - implement BuildHooksEnv
 112     - implement BuildHooksNodes
 113     - redefine HPATH and HTYPE
 114     - optionally redefine their run requirements:
 115         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 116
 117   Note that all commands require root permissions.
 118
 119   @ivar dry_run_result: the value (if any) that will be returned to the caller
 120       in dry-run mode (signalled by opcode dry_run parameter)
 121
 122   """
 123   HPATH = None
 124   HTYPE = None
 125   REQ_BGL = True
 126
 127   def __init__(self, processor, op, context, rpc_runner):
 128     """Constructor for LogicalUnit.
 129
 130     This needs to be overridden in derived classes in order to check op
 131     validity.
 132
 133     """
 134     self.proc = processor
 135     self.op = op
 136     self.cfg = context.cfg
 137     self.glm = context.glm
 138     # readability alias
 139     self.owned_locks = context.glm.list_owned
 140     self.context = context
 141     self.rpc = rpc_runner
 142     # Dicts used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.add_locks = {}
 146     self.remove_locks = {}
 147     # Used to force good behavior when calling helper functions
 148     self.recalculate_locks = {}
 149     # logging
 150     self.Log = processor.Log # pylint: disable=C0103
 151     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 152     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 153     self.LogStep = processor.LogStep # pylint: disable=C0103
 154     # support for dry-run
 155     self.dry_run_result = None
 156     # support for generic debug attribute
 157     if (not hasattr(self.op, "debug_level") or
 158         not isinstance(self.op.debug_level, int)):
 159       self.op.debug_level = 0
 160
 161     # Tasklets
 162     self.tasklets = None
 163
 164     # Validate opcode parameters and set defaults
 165     self.op.Validate(True)
 166
 167     self.CheckArguments()
 168
 169   def CheckArguments(self):
 170     """Check syntactic validity for the opcode arguments.
 171
 172     This method is for doing a simple syntactic check and ensure
 173     validity of opcode parameters, without any cluster-related
 174     checks. While the same can be accomplished in ExpandNames and/or
 175     CheckPrereq, doing these separate is better because:
 176
 177       - ExpandNames is left as as purely a lock-related function
 178       - CheckPrereq is run after we have acquired locks (and possible
 179         waited for them)
 180
 181     The function is allowed to change the self.op attribute so that
 182     later methods can no longer worry about missing parameters.
 183
 184     """
 185     pass
 186
 187   def ExpandNames(self):
 188     """Expand names for this LU.
 189
 190     This method is called before starting to execute the opcode, and it should
 191     update all the parameters of the opcode to their canonical form (e.g. a
 192     short node name must be fully expanded after this method has successfully
 193     completed). This way locking, hooks, logging, etc. can work correctly.
 194
 195     LUs which implement this method must also populate the self.needed_locks
 196     member, as a dict with lock levels as keys, and a list of needed lock names
 197     as values. Rules:
 198
 199       - use an empty dict if you don't need any lock
 200       - if you don't need any lock at a particular level omit that
 201         level (note that in this case C{DeclareLocks} won't be called
 202         at all for that level)
 203       - if you need locks at a level, but you can't calculate it in
 204         this function, initialise that level with an empty list and do
 205         further processing in L{LogicalUnit.DeclareLocks} (see that
 206         function's docstring)
 207       - don't put anything for the BGL level
 208       - if you want all locks at a level use L{locking.ALL_SET} as a value
 209
 210     If you need to share locks (rather than acquire them exclusively) at one
 211     level you can modify self.share_locks, setting a true value (usually 1) for
 212     that level. By default locks are not shared.
 213
 214     This function can also define a list of tasklets, which then will be
 215     executed in order instead of the usual LU-level CheckPrereq and Exec
 216     functions, if those are not defined by the LU.
 217
 218     Examples::
 219
 220       # Acquire all nodes and one instance
 221       self.needed_locks = {
 222         locking.LEVEL_NODE: locking.ALL_SET,
 223         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 224       }
 225       # Acquire just two nodes
 226       self.needed_locks = {
 227         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 228       }
 229       # Acquire no locks
 230       self.needed_locks = {} # No, you can't leave it to the default value None
 231
 232     """
 233     # The implementation of this method is mandatory only if the new LU is
 234     # concurrent, so that old LUs don't need to be changed all at the same
 235     # time.
 236     if self.REQ_BGL:
 237       self.needed_locks = {} # Exclusive LUs don't need locks.
 238     else:
 239       raise NotImplementedError
 240
 241   def DeclareLocks(self, level):
 242     """Declare LU locking needs for a level
 243
 244     While most LUs can just declare their locking needs at ExpandNames time,
 245     sometimes there's the need to calculate some locks after having acquired
 246     the ones before. This function is called just before acquiring locks at a
 247     particular level, but after acquiring the ones at lower levels, and permits
 248     such calculations. It can be used to modify self.needed_locks, and by
 249     default it does nothing.
 250
 251     This function is only called if you have something already set in
 252     self.needed_locks for the level.
 253
 254     @param level: Locking level which is going to be locked
 255     @type level: member of L{ganeti.locking.LEVELS}
 256
 257     """
 258
 259   def CheckPrereq(self):
 260     """Check prerequisites for this LU.
 261
 262     This method should check that the prerequisites for the execution
 263     of this LU are fulfilled. It can do internode communication, but
 264     it should be idempotent - no cluster or system changes are
 265     allowed.
 266
 267     The method should raise errors.OpPrereqError in case something is
 268     not fulfilled. Its return value is ignored.
 269
 270     This method should also update all the parameters of the opcode to
 271     their canonical form if it hasn't been done by ExpandNames before.
 272
 273     """
 274     if self.tasklets is not None:
 275       for (idx, tl) in enumerate(self.tasklets):
 276         logging.debug("Checking prerequisites for tasklet %s/%s",
 277                       idx + 1, len(self.tasklets))
 278         tl.CheckPrereq()
 279     else:
 280       pass
 281
 282   def Exec(self, feedback_fn):
 283     """Execute the LU.
 284
 285     This method should implement the actual work. It should raise
 286     errors.OpExecError for failures that are somewhat dealt with in
 287     code, or expected.
 288
 289     """
 290     if self.tasklets is not None:
 291       for (idx, tl) in enumerate(self.tasklets):
 292         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 293         tl.Exec(feedback_fn)
 294     else:
 295       raise NotImplementedError
 296
 297   def BuildHooksEnv(self):
 298     """Build hooks environment for this LU.
 299
 300     @rtype: dict
 301     @return: Dictionary containing the environment that will be used for
 302       running the hooks for this LU. The keys of the dict must not be prefixed
 303       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 304       will extend the environment with additional variables. If no environment
 305       should be defined, an empty dictionary should be returned (not C{None}).
 306     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 307       will not be called.
 308
 309     """
 310     raise NotImplementedError
 311
 312   def BuildHooksNodes(self):
 313     """Build list of nodes to run LU's hooks.
 314
 315     @rtype: tuple; (list, list)
 316     @return: Tuple containing a list of node names on which the hook
 317       should run before the execution and a list of node names on which the
 318       hook should run after the execution. No nodes should be returned as an
 319       empty list (and not None).
 320     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 321       will not be called.
 322
 323     """
 324     raise NotImplementedError
 325
 326   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 327     """Notify the LU about the results of its hooks.
 328
 329     This method is called every time a hooks phase is executed, and notifies
 330     the Logical Unit about the hooks' result. The LU can then use it to alter
 331     its result based on the hooks.  By default the method does nothing and the
 332     previous result is passed back unchanged but any LU can define it if it
 333     wants to use the local cluster hook-scripts somehow.
 334
 335     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 336         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 337     @param hook_results: the results of the multi-node hooks rpc call
 338     @param feedback_fn: function used send feedback back to the caller
 339     @param lu_result: the previous Exec result this LU had, or None
 340         in the PRE phase
 341     @return: the new Exec result, based on the previous result
 342         and hook results
 343
 344     """
 345     # API must be kept, thus we ignore the unused argument and could
 346     # be a function warnings
 347     # pylint: disable=W0613,R0201
 348     return lu_result
 349
 350   def _ExpandAndLockInstance(self):
 351     """Helper function to expand and lock an instance.
 352
 353     Many LUs that work on an instance take its name in self.op.instance_name
 354     and need to expand it and then declare the expanded name for locking. This
 355     function does it, and then updates self.op.instance_name to the expanded
 356     name. It also initializes needed_locks as a dict, if this hasn't been done
 357     before.
 358
 359     """
 360     if self.needed_locks is None:
 361       self.needed_locks = {}
 362     else:
 363       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 364         "_ExpandAndLockInstance called with instance-level locks set"
 365     self.op.instance_name = _ExpandInstanceName(self.cfg,
 366                                                 self.op.instance_name)
 367     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 368
 369   def _LockInstancesNodes(self, primary_only=False,
 370                           level=locking.LEVEL_NODE):
 371     """Helper function to declare instances' nodes for locking.
 372
 373     This function should be called after locking one or more instances to lock
 374     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 375     with all primary or secondary nodes for instances already locked and
 376     present in self.needed_locks[locking.LEVEL_INSTANCE].
 377
 378     It should be called from DeclareLocks, and for safety only works if
 379     self.recalculate_locks[locking.LEVEL_NODE] is set.
 380
 381     In the future it may grow parameters to just lock some instance's nodes, or
 382     to just lock primaries or secondary nodes, if needed.
 383
 384     If should be called in DeclareLocks in a way similar to::
 385
 386       if level == locking.LEVEL_NODE:
 387         self._LockInstancesNodes()
 388
 389     @type primary_only: boolean
 390     @param primary_only: only lock primary nodes of locked instances
 391     @param level: Which lock level to use for locking nodes
 392
 393     """
 394     assert level in self.recalculate_locks, \
 395       "_LockInstancesNodes helper function called with no nodes to recalculate"
 396
 397     # TODO: check if we're really been called with the instance locks held
 398
 399     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 400     # future we might want to have different behaviors depending on the value
 401     # of self.recalculate_locks[locking.LEVEL_NODE]
 402     wanted_nodes = []
 403     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 404     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 405       wanted_nodes.append(instance.primary_node)
 406       if not primary_only:
 407         wanted_nodes.extend(instance.secondary_nodes)
 408
 409     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 410       self.needed_locks[level] = wanted_nodes
 411     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 412       self.needed_locks[level].extend(wanted_nodes)
 413     else:
 414       raise errors.ProgrammerError("Unknown recalculation mode")
 415
 416     del self.recalculate_locks[level]
 417
 418
 419 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 420   """Simple LU which runs no hooks.
 421
 422   This LU is intended as a parent for other LogicalUnits which will
 423   run no hooks, in order to reduce duplicate code.
 424
 425   """
 426   HPATH = None
 427   HTYPE = None
 428
 429   def BuildHooksEnv(self):
 430     """Empty BuildHooksEnv for NoHooksLu.
 431
 432     This just raises an error.
 433
 434     """
 435     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 436
 437   def BuildHooksNodes(self):
 438     """Empty BuildHooksNodes for NoHooksLU.
 439
 440     """
 441     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 442
 443
 444 class Tasklet:
 445   """Tasklet base class.
 446
 447   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 448   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 449   tasklets know nothing about locks.
 450
 451   Subclasses must follow these rules:
 452     - Implement CheckPrereq
 453     - Implement Exec
 454
 455   """
 456   def __init__(self, lu):
 457     self.lu = lu
 458
 459     # Shortcuts
 460     self.cfg = lu.cfg
 461     self.rpc = lu.rpc
 462
 463   def CheckPrereq(self):
 464     """Check prerequisites for this tasklets.
 465
 466     This method should check whether the prerequisites for the execution of
 467     this tasklet are fulfilled. It can do internode communication, but it
 468     should be idempotent - no cluster or system changes are allowed.
 469
 470     The method should raise errors.OpPrereqError in case something is not
 471     fulfilled. Its return value is ignored.
 472
 473     This method should also update all parameters to their canonical form if it
 474     hasn't been done before.
 475
 476     """
 477     pass
 478
 479   def Exec(self, feedback_fn):
 480     """Execute the tasklet.
 481
 482     This method should implement the actual work. It should raise
 483     errors.OpExecError for failures that are somewhat dealt with in code, or
 484     expected.
 485
 486     """
 487     raise NotImplementedError
 488
 489
 490 class _QueryBase:
 491   """Base for query utility classes.
 492
 493   """
 494   #: Attribute holding field definitions
 495   FIELDS = None
 496
 497   #: Field to sort by
 498   SORT_FIELD = "name"
 499
 500   def __init__(self, qfilter, fields, use_locking):
 501     """Initializes this class.
 502
 503     """
 504     self.use_locking = use_locking
 505
 506     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 507                              namefield=self.SORT_FIELD)
 508     self.requested_data = self.query.RequestedData()
 509     self.names = self.query.RequestedNames()
 510
 511     # Sort only if no names were requested
 512     self.sort_by_name = not self.names
 513
 514     self.do_locking = None
 515     self.wanted = None
 516
 517   def _GetNames(self, lu, all_names, lock_level):
 518     """Helper function to determine names asked for in the query.
 519
 520     """
 521     if self.do_locking:
 522       names = lu.owned_locks(lock_level)
 523     else:
 524       names = all_names
 525
 526     if self.wanted == locking.ALL_SET:
 527       assert not self.names
 528       # caller didn't specify names, so ordering is not important
 529       return utils.NiceSort(names)
 530
 531     # caller specified names and we must keep the same order
 532     assert self.names
 533     assert not self.do_locking or lu.glm.is_owned(lock_level)
 534
 535     missing = set(self.wanted).difference(names)
 536     if missing:
 537       raise errors.OpExecError("Some items were removed before retrieving"
 538                                " their data: %s" % missing)
 539
 540     # Return expanded names
 541     return self.wanted
 542
 543   def ExpandNames(self, lu):
 544     """Expand names for this query.
 545
 546     See L{LogicalUnit.ExpandNames}.
 547
 548     """
 549     raise NotImplementedError()
 550
 551   def DeclareLocks(self, lu, level):
 552     """Declare locks for this query.
 553
 554     See L{LogicalUnit.DeclareLocks}.
 555
 556     """
 557     raise NotImplementedError()
 558
 559   def _GetQueryData(self, lu):
 560     """Collects all data for this query.
 561
 562     @return: Query data object
 563
 564     """
 565     raise NotImplementedError()
 566
 567   def NewStyleQuery(self, lu):
 568     """Collect data and execute query.
 569
 570     """
 571     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 572                                   sort_by_name=self.sort_by_name)
 573
 574   def OldStyleQuery(self, lu):
 575     """Collect data and execute query.
 576
 577     """
 578     return self.query.OldStyleQuery(self._GetQueryData(lu),
 579                                     sort_by_name=self.sort_by_name)
 580
 581
 582 def _ShareAll():
 583   """Returns a dict declaring all lock levels shared.
 584
 585   """
 586   return dict.fromkeys(locking.LEVELS, 1)
 587
 588
 589 def _AnnotateDiskParams(instance, devs, cfg):
 590   """Little helper wrapper to the rpc annotation method.
 591
 592   @param instance: The instance object
 593   @type devs: List of L{objects.Disk}
 594   @param devs: The root devices (not any of its children!)
 595   @param cfg: The config object
 596   @returns The annotated disk copies
 597   @see L{rpc.AnnotateDiskParams}
 598
 599   """
 600   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 601                                 cfg.GetInstanceDiskParams(instance))
 602
 603
 604 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 605                               cur_group_uuid):
 606   """Checks if node groups for locked instances are still correct.
 607
 608   @type cfg: L{config.ConfigWriter}
 609   @param cfg: Cluster configuration
 610   @type instances: dict; string as key, L{objects.Instance} as value
 611   @param instances: Dictionary, instance name as key, instance object as value
 612   @type owned_groups: iterable of string
 613   @param owned_groups: List of owned groups
 614   @type owned_nodes: iterable of string
 615   @param owned_nodes: List of owned nodes
 616   @type cur_group_uuid: string or None
 617   @param cur_group_uuid: Optional group UUID to check against instance's groups
 618
 619   """
 620   for (name, inst) in instances.items():
 621     assert owned_nodes.issuperset(inst.all_nodes), \
 622       "Instance %s's nodes changed while we kept the lock" % name
 623
 624     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 625
 626     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 627       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 628
 629
 630 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 631                              primary_only=False):
 632   """Checks if the owned node groups are still correct for an instance.
 633
 634   @type cfg: L{config.ConfigWriter}
 635   @param cfg: The cluster configuration
 636   @type instance_name: string
 637   @param instance_name: Instance name
 638   @type owned_groups: set or frozenset
 639   @param owned_groups: List of currently owned node groups
 640   @type primary_only: boolean
 641   @param primary_only: Whether to check node groups for only the primary node
 642
 643   """
 644   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 645
 646   if not owned_groups.issuperset(inst_groups):
 647     raise errors.OpPrereqError("Instance %s's node groups changed since"
 648                                " locks were acquired, current groups are"
 649                                " are '%s', owning groups '%s'; retry the"
 650                                " operation" %
 651                                (instance_name,
 652                                 utils.CommaJoin(inst_groups),
 653                                 utils.CommaJoin(owned_groups)),
 654                                errors.ECODE_STATE)
 655
 656   return inst_groups
 657
 658
 659 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 660   """Checks if the instances in a node group are still correct.
 661
 662   @type cfg: L{config.ConfigWriter}
 663   @param cfg: The cluster configuration
 664   @type group_uuid: string
 665   @param group_uuid: Node group UUID
 666   @type owned_instances: set or frozenset
 667   @param owned_instances: List of currently owned instances
 668
 669   """
 670   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 671   if owned_instances != wanted_instances:
 672     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 673                                " locks were acquired, wanted '%s', have '%s';"
 674                                " retry the operation" %
 675                                (group_uuid,
 676                                 utils.CommaJoin(wanted_instances),
 677                                 utils.CommaJoin(owned_instances)),
 678                                errors.ECODE_STATE)
 679
 680   return wanted_instances
 681
 682
 683 def _SupportsOob(cfg, node):
 684   """Tells if node supports OOB.
 685
 686   @type cfg: L{config.ConfigWriter}
 687   @param cfg: The cluster configuration
 688   @type node: L{objects.Node}
 689   @param node: The node
 690   @return: The OOB script if supported or an empty string otherwise
 691
 692   """
 693   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 694
 695
 696 def _CopyLockList(names):
 697   """Makes a copy of a list of lock names.
 698
 699   Handles L{locking.ALL_SET} correctly.
 700
 701   """
 702   if names == locking.ALL_SET:
 703     return locking.ALL_SET
 704   else:
 705     return names[:]
 706
 707
 708 def _GetWantedNodes(lu, nodes):
 709   """Returns list of checked and expanded node names.
 710
 711   @type lu: L{LogicalUnit}
 712   @param lu: the logical unit on whose behalf we execute
 713   @type nodes: list
 714   @param nodes: list of node names or None for all nodes
 715   @rtype: list
 716   @return: the list of nodes, sorted
 717   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 718
 719   """
 720   if nodes:
 721     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 722
 723   return utils.NiceSort(lu.cfg.GetNodeList())
 724
 725
 726 def _GetWantedInstances(lu, instances):
 727   """Returns list of checked and expanded instance names.
 728
 729   @type lu: L{LogicalUnit}
 730   @param lu: the logical unit on whose behalf we execute
 731   @type instances: list
 732   @param instances: list of instance names or None for all instances
 733   @rtype: list
 734   @return: the list of instances, sorted
 735   @raise errors.OpPrereqError: if the instances parameter is wrong type
 736   @raise errors.OpPrereqError: if any of the passed instances is not found
 737
 738   """
 739   if instances:
 740     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 741   else:
 742     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 743   return wanted
 744
 745
 746 def _GetUpdatedParams(old_params, update_dict,
 747                       use_default=True, use_none=False):
 748   """Return the new version of a parameter dictionary.
 749
 750   @type old_params: dict
 751   @param old_params: old parameters
 752   @type update_dict: dict
 753   @param update_dict: dict containing new parameter values, or
 754       constants.VALUE_DEFAULT to reset the parameter to its default
 755       value
 756   @param use_default: boolean
 757   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 758       values as 'to be deleted' values
 759   @param use_none: boolean
 760   @type use_none: whether to recognise C{None} values as 'to be
 761       deleted' values
 762   @rtype: dict
 763   @return: the new parameter dictionary
 764
 765   """
 766   params_copy = copy.deepcopy(old_params)
 767   for key, val in update_dict.iteritems():
 768     if ((use_default and val == constants.VALUE_DEFAULT) or
 769         (use_none and val is None)):
 770       try:
 771         del params_copy[key]
 772       except KeyError:
 773         pass
 774     else:
 775       params_copy[key] = val
 776   return params_copy
 777
 778
 779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 780   """Return the new version of a instance policy.
 781
 782   @param group_policy: whether this policy applies to a group and thus
 783     we should support removal of policy entries
 784
 785   """
 786   use_none = use_default = group_policy
 787   ipolicy = copy.deepcopy(old_ipolicy)
 788   for key, value in new_ipolicy.items():
 789     if key not in constants.IPOLICY_ALL_KEYS:
 790       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 791                                  errors.ECODE_INVAL)
 792     if key in constants.IPOLICY_ISPECS:
 793       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 794       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 795                                        use_none=use_none,
 796                                        use_default=use_default)
 797     else:
 798       if (not value or value == [constants.VALUE_DEFAULT] or
 799           value == constants.VALUE_DEFAULT):
 800         if group_policy:
 801           del ipolicy[key]
 802         else:
 803           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 804                                      " on the cluster'" % key,
 805                                      errors.ECODE_INVAL)
 806       else:
 807         if key in constants.IPOLICY_PARAMETERS:
 808           # FIXME: we assume all such values are float
 809           try:
 810             ipolicy[key] = float(value)
 811           except (TypeError, ValueError), err:
 812             raise errors.OpPrereqError("Invalid value for attribute"
 813                                        " '%s': '%s', error: %s" %
 814                                        (key, value, err), errors.ECODE_INVAL)
 815         else:
 816           # FIXME: we assume all others are lists; this should be redone
 817           # in a nicer way
 818           ipolicy[key] = list(value)
 819   try:
 820     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 821   except errors.ConfigurationError, err:
 822     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 823                                errors.ECODE_INVAL)
 824   return ipolicy
 825
 826
 827 def _UpdateAndVerifySubDict(base, updates, type_check):
 828   """Updates and verifies a dict with sub dicts of the same type.
 829
 830   @param base: The dict with the old data
 831   @param updates: The dict with the new data
 832   @param type_check: Dict suitable to ForceDictType to verify correct types
 833   @returns: A new dict with updated and verified values
 834
 835   """
 836   def fn(old, value):
 837     new = _GetUpdatedParams(old, value)
 838     utils.ForceDictType(new, type_check)
 839     return new
 840
 841   ret = copy.deepcopy(base)
 842   ret.update(dict((key, fn(base.get(key, {}), value))
 843                   for key, value in updates.items()))
 844   return ret
 845
 846
 847 def _MergeAndVerifyHvState(op_input, obj_input):
 848   """Combines the hv state from an opcode with the one of the object
 849
 850   @param op_input: The input dict from the opcode
 851   @param obj_input: The input dict from the objects
 852   @return: The verified and updated dict
 853
 854   """
 855   if op_input:
 856     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 857     if invalid_hvs:
 858       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 859                                  " %s" % utils.CommaJoin(invalid_hvs),
 860                                  errors.ECODE_INVAL)
 861     if obj_input is None:
 862       obj_input = {}
 863     type_check = constants.HVSTS_PARAMETER_TYPES
 864     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 865
 866   return None
 867
 868
 869 def _MergeAndVerifyDiskState(op_input, obj_input):
 870   """Combines the disk state from an opcode with the one of the object
 871
 872   @param op_input: The input dict from the opcode
 873   @param obj_input: The input dict from the objects
 874   @return: The verified and updated dict
 875   """
 876   if op_input:
 877     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 878     if invalid_dst:
 879       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 880                                  utils.CommaJoin(invalid_dst),
 881                                  errors.ECODE_INVAL)
 882     type_check = constants.DSS_PARAMETER_TYPES
 883     if obj_input is None:
 884       obj_input = {}
 885     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 886                                               type_check))
 887                 for key, value in op_input.items())
 888
 889   return None
 890
 891
 892 def _ReleaseLocks(lu, level, names=None, keep=None):
 893   """Releases locks owned by an LU.
 894
 895   @type lu: L{LogicalUnit}
 896   @param level: Lock level
 897   @type names: list or None
 898   @param names: Names of locks to release
 899   @type keep: list or None
 900   @param keep: Names of locks to retain
 901
 902   """
 903   assert not (keep is not None and names is not None), \
 904          "Only one of the 'names' and the 'keep' parameters can be given"
 905
 906   if names is not None:
 907     should_release = names.__contains__
 908   elif keep:
 909     should_release = lambda name: name not in keep
 910   else:
 911     should_release = None
 912
 913   owned = lu.owned_locks(level)
 914   if not owned:
 915     # Not owning any lock at this level, do nothing
 916     pass
 917
 918   elif should_release:
 919     retain = []
 920     release = []
 921
 922     # Determine which locks to release
 923     for name in owned:
 924       if should_release(name):
 925         release.append(name)
 926       else:
 927         retain.append(name)
 928
 929     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 930
 931     # Release just some locks
 932     lu.glm.release(level, names=release)
 933
 934     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 935   else:
 936     # Release everything
 937     lu.glm.release(level)
 938
 939     assert not lu.glm.is_owned(level), "No locks should be owned"
 940
 941
 942 def _MapInstanceDisksToNodes(instances):
 943   """Creates a map from (node, volume) to instance name.
 944
 945   @type instances: list of L{objects.Instance}
 946   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 947
 948   """
 949   return dict(((node, vol), inst.name)
 950               for inst in instances
 951               for (node, vols) in inst.MapLVsByNode().items()
 952               for vol in vols)
 953
 954
 955 def _RunPostHook(lu, node_name):
 956   """Runs the post-hook for an opcode on a single node.
 957
 958   """
 959   hm = lu.proc.BuildHooksManager(lu)
 960   try:
 961     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 962   except Exception, err: # pylint: disable=W0703
 963     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 964
 965
 966 def _CheckOutputFields(static, dynamic, selected):
 967   """Checks whether all selected fields are valid.
 968
 969   @type static: L{utils.FieldSet}
 970   @param static: static fields set
 971   @type dynamic: L{utils.FieldSet}
 972   @param dynamic: dynamic fields set
 973
 974   """
 975   f = utils.FieldSet()
 976   f.Extend(static)
 977   f.Extend(dynamic)
 978
 979   delta = f.NonMatching(selected)
 980   if delta:
 981     raise errors.OpPrereqError("Unknown output fields selected: %s"
 982                                % ",".join(delta), errors.ECODE_INVAL)
 983
 984
 985 def _CheckGlobalHvParams(params):
 986   """Validates that given hypervisor params are not global ones.
 987
 988   This will ensure that instances don't get customised versions of
 989   global params.
 990
 991   """
 992   used_globals = constants.HVC_GLOBALS.intersection(params)
 993   if used_globals:
 994     msg = ("The following hypervisor parameters are global and cannot"
 995            " be customized at instance level, please modify them at"
 996            " cluster level: %s" % utils.CommaJoin(used_globals))
 997     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 998
 999
1000 def _CheckNodeOnline(lu, node, msg=None):
1001   """Ensure that a given node is online.
1002
1003   @param lu: the LU on behalf of which we make the check
1004   @param node: the node to check
1005   @param msg: if passed, should be a message to replace the default one
1006   @raise errors.OpPrereqError: if the node is offline
1007
1008   """
1009   if msg is None:
1010     msg = "Can't use offline node"
1011   if lu.cfg.GetNodeInfo(node).offline:
1012     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013
1014
1015 def _CheckNodeNotDrained(lu, node):
1016   """Ensure that a given node is not drained.
1017
1018   @param lu: the LU on behalf of which we make the check
1019   @param node: the node to check
1020   @raise errors.OpPrereqError: if the node is drained
1021
1022   """
1023   if lu.cfg.GetNodeInfo(node).drained:
1024     raise errors.OpPrereqError("Can't use drained node %s" % node,
1025                                errors.ECODE_STATE)
1026
1027
1028 def _CheckNodeVmCapable(lu, node):
1029   """Ensure that a given node is vm capable.
1030
1031   @param lu: the LU on behalf of which we make the check
1032   @param node: the node to check
1033   @raise errors.OpPrereqError: if the node is not vm capable
1034
1035   """
1036   if not lu.cfg.GetNodeInfo(node).vm_capable:
1037     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1038                                errors.ECODE_STATE)
1039
1040
1041 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1042   """Ensure that a node supports a given OS.
1043
1044   @param lu: the LU on behalf of which we make the check
1045   @param node: the node to check
1046   @param os_name: the OS to query about
1047   @param force_variant: whether to ignore variant errors
1048   @raise errors.OpPrereqError: if the node is not supporting the OS
1049
1050   """
1051   result = lu.rpc.call_os_get(node, os_name)
1052   result.Raise("OS '%s' not in supported OS list for node %s" %
1053                (os_name, node),
1054                prereq=True, ecode=errors.ECODE_INVAL)
1055   if not force_variant:
1056     _CheckOSVariant(result.payload, os_name)
1057
1058
1059 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1060   """Ensure that a node has the given secondary ip.
1061
1062   @type lu: L{LogicalUnit}
1063   @param lu: the LU on behalf of which we make the check
1064   @type node: string
1065   @param node: the node to check
1066   @type secondary_ip: string
1067   @param secondary_ip: the ip to check
1068   @type prereq: boolean
1069   @param prereq: whether to throw a prerequisite or an execute error
1070   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1071   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072
1073   """
1074   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1075   result.Raise("Failure checking secondary ip on node %s" % node,
1076                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1077   if not result.payload:
1078     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1079            " please fix and re-run this command" % secondary_ip)
1080     if prereq:
1081       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082     else:
1083       raise errors.OpExecError(msg)
1084
1085
1086 def _GetClusterDomainSecret():
1087   """Reads the cluster domain secret.
1088
1089   """
1090   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1091                                strict=True)
1092
1093
1094 def _CheckInstanceState(lu, instance, req_states, msg=None):
1095   """Ensure that an instance is in one of the required states.
1096
1097   @param lu: the LU on behalf of which we make the check
1098   @param instance: the instance to check
1099   @param msg: if passed, should be a message to replace the default one
1100   @raise errors.OpPrereqError: if the instance is not in the required state
1101
1102   """
1103   if msg is None:
1104     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1105   if instance.admin_state not in req_states:
1106     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1107                                (instance.name, instance.admin_state, msg),
1108                                errors.ECODE_STATE)
1109
1110   if constants.ADMINST_UP not in req_states:
1111     pnode = instance.primary_node
1112     if not lu.cfg.GetNodeInfo(pnode).offline:
1113       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115                   prereq=True, ecode=errors.ECODE_ENVIRON)
1116       if instance.name in ins_l.payload:
1117         raise errors.OpPrereqError("Instance %s is running, %s" %
1118                                    (instance.name, msg), errors.ECODE_STATE)
1119     else:
1120       lu.LogWarning("Primary node offline, ignoring check that instance"
1121                      " is down")
1122
1123
1124 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1125   """Computes if value is in the desired range.
1126
1127   @param name: name of the parameter for which we perform the check
1128   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129       not just 'disk')
1130   @param ipolicy: dictionary containing min, max and std values
1131   @param value: actual value that we want to use
1132   @return: None or element not meeting the criteria
1133
1134
1135   """
1136   if value in [None, constants.VALUE_AUTO]:
1137     return None
1138   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1139   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1140   if value > max_v or min_v > value:
1141     if qualifier:
1142       fqn = "%s/%s" % (name, qualifier)
1143     else:
1144       fqn = name
1145     return ("%s value %s is not in range [%s, %s]" %
1146             (fqn, value, min_v, max_v))
1147   return None
1148
1149
1150 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1151                                  nic_count, disk_sizes, spindle_use,
1152                                  _compute_fn=_ComputeMinMaxSpec):
1153   """Verifies ipolicy against provided specs.
1154
1155   @type ipolicy: dict
1156   @param ipolicy: The ipolicy
1157   @type mem_size: int
1158   @param mem_size: The memory size
1159   @type cpu_count: int
1160   @param cpu_count: Used cpu cores
1161   @type disk_count: int
1162   @param disk_count: Number of disks used
1163   @type nic_count: int
1164   @param nic_count: Number of nics used
1165   @type disk_sizes: list of ints
1166   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1167   @type spindle_use: int
1168   @param spindle_use: The number of spindles this instance uses
1169   @param _compute_fn: The compute function (unittest only)
1170   @return: A list of violations, or an empty list of no violations are found
1171
1172   """
1173   assert disk_count == len(disk_sizes)
1174
1175   test_settings = [
1176     (constants.ISPEC_MEM_SIZE, "", mem_size),
1177     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1178     (constants.ISPEC_DISK_COUNT, "", disk_count),
1179     (constants.ISPEC_NIC_COUNT, "", nic_count),
1180     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1181     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1182          for idx, d in enumerate(disk_sizes)]
1183
1184   return filter(None,
1185                 (_compute_fn(name, qualifier, ipolicy, value)
1186                  for (name, qualifier, value) in test_settings))
1187
1188
1189 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1190                                      _compute_fn=_ComputeIPolicySpecViolation):
1191   """Compute if instance meets the specs of ipolicy.
1192
1193   @type ipolicy: dict
1194   @param ipolicy: The ipolicy to verify against
1195   @type instance: L{objects.Instance}
1196   @param instance: The instance to verify
1197   @param _compute_fn: The function to verify ipolicy (unittest only)
1198   @see: L{_ComputeIPolicySpecViolation}
1199
1200   """
1201   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1202   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1203   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1204   disk_count = len(instance.disks)
1205   disk_sizes = [disk.size for disk in instance.disks]
1206   nic_count = len(instance.nics)
1207
1208   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1209                      disk_sizes, spindle_use)
1210
1211
1212 def _ComputeIPolicyInstanceSpecViolation(
1213   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1214   """Compute if instance specs meets the specs of ipolicy.
1215
1216   @type ipolicy: dict
1217   @param ipolicy: The ipolicy to verify against
1218   @param instance_spec: dict
1219   @param instance_spec: The instance spec to verify
1220   @param _compute_fn: The function to verify ipolicy (unittest only)
1221   @see: L{_ComputeIPolicySpecViolation}
1222
1223   """
1224   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1225   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1226   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1227   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1228   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1229   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230
1231   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1232                      disk_sizes, spindle_use)
1233
1234
1235 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236                                  target_group,
1237                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1238   """Compute if instance meets the specs of the new target group.
1239
1240   @param ipolicy: The ipolicy to verify
1241   @param instance: The instance object to verify
1242   @param current_group: The current group of the instance
1243   @param target_group: The new group of the instance
1244   @param _compute_fn: The function to verify ipolicy (unittest only)
1245   @see: L{_ComputeIPolicySpecViolation}
1246
1247   """
1248   if current_group == target_group:
1249     return []
1250   else:
1251     return _compute_fn(ipolicy, instance)
1252
1253
1254 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1255                             _compute_fn=_ComputeIPolicyNodeViolation):
1256   """Checks that the target node is correct in terms of instance policy.
1257
1258   @param ipolicy: The ipolicy to verify
1259   @param instance: The instance object to verify
1260   @param node: The new node to relocate
1261   @param ignore: Ignore violations of the ipolicy
1262   @param _compute_fn: The function to verify ipolicy (unittest only)
1263   @see: L{_ComputeIPolicySpecViolation}
1264
1265   """
1266   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1267   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268
1269   if res:
1270     msg = ("Instance does not meet target node group's (%s) instance"
1271            " policy: %s") % (node.group, utils.CommaJoin(res))
1272     if ignore:
1273       lu.LogWarning(msg)
1274     else:
1275       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276
1277
1278 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1279   """Computes a set of any instances that would violate the new ipolicy.
1280
1281   @param old_ipolicy: The current (still in-place) ipolicy
1282   @param new_ipolicy: The new (to become) ipolicy
1283   @param instances: List of instances to verify
1284   @return: A list of instances which violates the new ipolicy but
1285       did not before
1286
1287   """
1288   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1289           _ComputeViolatingInstances(old_ipolicy, instances))
1290
1291
1292 def _ExpandItemName(fn, name, kind):
1293   """Expand an item name.
1294
1295   @param fn: the function to use for expansion
1296   @param name: requested item name
1297   @param kind: text description ('Node' or 'Instance')
1298   @return: the resolved (full) name
1299   @raise errors.OpPrereqError: if the item is not found
1300
1301   """
1302   full_name = fn(name)
1303   if full_name is None:
1304     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305                                errors.ECODE_NOENT)
1306   return full_name
1307
1308
1309 def _ExpandNodeName(cfg, name):
1310   """Wrapper over L{_ExpandItemName} for nodes."""
1311   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312
1313
1314 def _ExpandInstanceName(cfg, name):
1315   """Wrapper over L{_ExpandItemName} for instance."""
1316   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317
1318
1319 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1320                           minmem, maxmem, vcpus, nics, disk_template, disks,
1321                           bep, hvp, hypervisor_name, tags):
1322   """Builds instance related env variables for hooks
1323
1324   This builds the hook environment from individual variables.
1325
1326   @type name: string
1327   @param name: the name of the instance
1328   @type primary_node: string
1329   @param primary_node: the name of the instance's primary node
1330   @type secondary_nodes: list
1331   @param secondary_nodes: list of secondary nodes as strings
1332   @type os_type: string
1333   @param os_type: the name of the instance's OS
1334   @type status: string
1335   @param status: the desired status of the instance
1336   @type minmem: string
1337   @param minmem: the minimum memory size of the instance
1338   @type maxmem: string
1339   @param maxmem: the maximum memory size of the instance
1340   @type vcpus: string
1341   @param vcpus: the count of VCPUs the instance has
1342   @type nics: list
1343   @param nics: list of tuples (ip, mac, mode, link, network) representing
1344       the NICs the instance has
1345   @type disk_template: string
1346   @param disk_template: the disk template of the instance
1347   @type disks: list
1348   @param disks: the list of (size, mode) pairs
1349   @type bep: dict
1350   @param bep: the backend parameters for the instance
1351   @type hvp: dict
1352   @param hvp: the hypervisor parameters for the instance
1353   @type hypervisor_name: string
1354   @param hypervisor_name: the hypervisor for the instance
1355   @type tags: list
1356   @param tags: list of instance tags as strings
1357   @rtype: dict
1358   @return: the hook environment for this instance
1359
1360   """
1361   env = {
1362     "OP_TARGET": name,
1363     "INSTANCE_NAME": name,
1364     "INSTANCE_PRIMARY": primary_node,
1365     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1366     "INSTANCE_OS_TYPE": os_type,
1367     "INSTANCE_STATUS": status,
1368     "INSTANCE_MINMEM": minmem,
1369     "INSTANCE_MAXMEM": maxmem,
1370     # TODO(2.7) remove deprecated "memory" value
1371     "INSTANCE_MEMORY": maxmem,
1372     "INSTANCE_VCPUS": vcpus,
1373     "INSTANCE_DISK_TEMPLATE": disk_template,
1374     "INSTANCE_HYPERVISOR": hypervisor_name,
1375   }
1376   if nics:
1377     nic_count = len(nics)
1378     for idx, (ip, mac, mode, link, network) in enumerate(nics):
1379       if ip is None:
1380         ip = ""
1381       env["INSTANCE_NIC%d_IP" % idx] = ip
1382       env["INSTANCE_NIC%d_MAC" % idx] = mac
1383       env["INSTANCE_NIC%d_MODE" % idx] = mode
1384       env["INSTANCE_NIC%d_LINK" % idx] = link
1385       env["INSTANCE_NIC%d_NETWORK" % idx] = network
1386       if mode == constants.NIC_MODE_BRIDGED:
1387         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388   else:
1389     nic_count = 0
1390
1391   env["INSTANCE_NIC_COUNT"] = nic_count
1392
1393   if disks:
1394     disk_count = len(disks)
1395     for idx, (size, mode) in enumerate(disks):
1396       env["INSTANCE_DISK%d_SIZE" % idx] = size
1397       env["INSTANCE_DISK%d_MODE" % idx] = mode
1398   else:
1399     disk_count = 0
1400
1401   env["INSTANCE_DISK_COUNT"] = disk_count
1402
1403   if not tags:
1404     tags = []
1405
1406   env["INSTANCE_TAGS"] = " ".join(tags)
1407
1408   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1409     for key, value in source.items():
1410       env["INSTANCE_%s_%s" % (kind, key)] = value
1411
1412   return env
1413
1414
1415 def _NICListToTuple(lu, nics):
1416   """Build a list of nic information tuples.
1417
1418   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1419   value in LUInstanceQueryData.
1420
1421   @type lu:  L{LogicalUnit}
1422   @param lu: the logical unit on whose behalf we execute
1423   @type nics: list of L{objects.NIC}
1424   @param nics: list of nics to convert to hooks tuples
1425
1426   """
1427   hooks_nics = []
1428   cluster = lu.cfg.GetClusterInfo()
1429   for nic in nics:
1430     ip = nic.ip
1431     mac = nic.mac
1432     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1433     mode = filled_params[constants.NIC_MODE]
1434     link = filled_params[constants.NIC_LINK]
1435     network = nic.network
1436     hooks_nics.append((ip, mac, mode, link, network))
1437   return hooks_nics
1438
1439
1440 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1441   """Builds instance related env variables for hooks from an object.
1442
1443   @type lu: L{LogicalUnit}
1444   @param lu: the logical unit on whose behalf we execute
1445   @type instance: L{objects.Instance}
1446   @param instance: the instance for which we should build the
1447       environment
1448   @type override: dict
1449   @param override: dictionary with key/values that will override
1450       our values
1451   @rtype: dict
1452   @return: the hook environment dictionary
1453
1454   """
1455   cluster = lu.cfg.GetClusterInfo()
1456   bep = cluster.FillBE(instance)
1457   hvp = cluster.FillHV(instance)
1458   args = {
1459     "name": instance.name,
1460     "primary_node": instance.primary_node,
1461     "secondary_nodes": instance.secondary_nodes,
1462     "os_type": instance.os,
1463     "status": instance.admin_state,
1464     "maxmem": bep[constants.BE_MAXMEM],
1465     "minmem": bep[constants.BE_MINMEM],
1466     "vcpus": bep[constants.BE_VCPUS],
1467     "nics": _NICListToTuple(lu, instance.nics),
1468     "disk_template": instance.disk_template,
1469     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1470     "bep": bep,
1471     "hvp": hvp,
1472     "hypervisor_name": instance.hypervisor,
1473     "tags": instance.tags,
1474   }
1475   if override:
1476     args.update(override)
1477   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1478
1479
1480 def _AdjustCandidatePool(lu, exceptions):
1481   """Adjust the candidate pool after node operations.
1482
1483   """
1484   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1485   if mod_list:
1486     lu.LogInfo("Promoted nodes to master candidate role: %s",
1487                utils.CommaJoin(node.name for node in mod_list))
1488     for name in mod_list:
1489       lu.context.ReaddNode(name)
1490   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1491   if mc_now > mc_max:
1492     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1493                (mc_now, mc_max))
1494
1495
1496 def _DecideSelfPromotion(lu, exceptions=None):
1497   """Decide whether I should promote myself as a master candidate.
1498
1499   """
1500   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1501   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1502   # the new node will increase mc_max with one, so:
1503   mc_should = min(mc_should + 1, cp_size)
1504   return mc_now < mc_should
1505
1506
1507 def _ComputeViolatingInstances(ipolicy, instances):
1508   """Computes a set of instances who violates given ipolicy.
1509
1510   @param ipolicy: The ipolicy to verify
1511   @type instances: object.Instance
1512   @param instances: List of instances to verify
1513   @return: A frozenset of instance names violating the ipolicy
1514
1515   """
1516   return frozenset([inst.name for inst in instances
1517                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1518
1519
1520 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1521   """Check that the brigdes needed by a list of nics exist.
1522
1523   """
1524   cluster = lu.cfg.GetClusterInfo()
1525   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1526   brlist = [params[constants.NIC_LINK] for params in paramslist
1527             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1528   if brlist:
1529     result = lu.rpc.call_bridges_exist(target_node, brlist)
1530     result.Raise("Error checking bridges on destination node '%s'" %
1531                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1532
1533
1534 def _CheckInstanceBridgesExist(lu, instance, node=None):
1535   """Check that the brigdes needed by an instance exist.
1536
1537   """
1538   if node is None:
1539     node = instance.primary_node
1540   _CheckNicsBridgesExist(lu, instance.nics, node)
1541
1542
1543 def _CheckOSVariant(os_obj, name):
1544   """Check whether an OS name conforms to the os variants specification.
1545
1546   @type os_obj: L{objects.OS}
1547   @param os_obj: OS object to check
1548   @type name: string
1549   @param name: OS name passed by the user, to check for validity
1550
1551   """
1552   variant = objects.OS.GetVariant(name)
1553   if not os_obj.supported_variants:
1554     if variant:
1555       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1556                                  " passed)" % (os_obj.name, variant),
1557                                  errors.ECODE_INVAL)
1558     return
1559   if not variant:
1560     raise errors.OpPrereqError("OS name must include a variant",
1561                                errors.ECODE_INVAL)
1562
1563   if variant not in os_obj.supported_variants:
1564     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1565
1566
1567 def _GetNodeInstancesInner(cfg, fn):
1568   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1569
1570
1571 def _GetNodeInstances(cfg, node_name):
1572   """Returns a list of all primary and secondary instances on a node.
1573
1574   """
1575
1576   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1577
1578
1579 def _GetNodePrimaryInstances(cfg, node_name):
1580   """Returns primary instances on a node.
1581
1582   """
1583   return _GetNodeInstancesInner(cfg,
1584                                 lambda inst: node_name == inst.primary_node)
1585
1586
1587 def _GetNodeSecondaryInstances(cfg, node_name):
1588   """Returns secondary instances on a node.
1589
1590   """
1591   return _GetNodeInstancesInner(cfg,
1592                                 lambda inst: node_name in inst.secondary_nodes)
1593
1594
1595 def _GetStorageTypeArgs(cfg, storage_type):
1596   """Returns the arguments for a storage type.
1597
1598   """
1599   # Special case for file storage
1600   if storage_type == constants.ST_FILE:
1601     # storage.FileStorage wants a list of storage directories
1602     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603
1604   return []
1605
1606
1607 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1608   faulty = []
1609
1610   for dev in instance.disks:
1611     cfg.SetDiskID(dev, node_name)
1612
1613   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1614                                                                 instance))
1615   result.Raise("Failed to get disk status from node %s" % node_name,
1616                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1617
1618   for idx, bdev_status in enumerate(result.payload):
1619     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1620       faulty.append(idx)
1621
1622   return faulty
1623
1624
1625 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1626   """Check the sanity of iallocator and node arguments and use the
1627   cluster-wide iallocator if appropriate.
1628
1629   Check that at most one of (iallocator, node) is specified. If none is
1630   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1631   then the LU's opcode's iallocator slot is filled with the cluster-wide
1632   default iallocator.
1633
1634   @type iallocator_slot: string
1635   @param iallocator_slot: the name of the opcode iallocator slot
1636   @type node_slot: string
1637   @param node_slot: the name of the opcode target node slot
1638
1639   """
1640   node = getattr(lu.op, node_slot, None)
1641   ialloc = getattr(lu.op, iallocator_slot, None)
1642   if node == []:
1643     node = None
1644
1645   if node is not None and ialloc is not None:
1646     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1647                                errors.ECODE_INVAL)
1648   elif ((node is None and ialloc is None) or
1649         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1650     default_iallocator = lu.cfg.GetDefaultIAllocator()
1651     if default_iallocator:
1652       setattr(lu.op, iallocator_slot, default_iallocator)
1653     else:
1654       raise errors.OpPrereqError("No iallocator or node given and no"
1655                                  " cluster-wide default iallocator found;"
1656                                  " please specify either an iallocator or a"
1657                                  " node, or set a cluster-wide default"
1658                                  " iallocator", errors.ECODE_INVAL)
1659
1660
1661 def _GetDefaultIAllocator(cfg, ialloc):
1662   """Decides on which iallocator to use.
1663
1664   @type cfg: L{config.ConfigWriter}
1665   @param cfg: Cluster configuration object
1666   @type ialloc: string or None
1667   @param ialloc: Iallocator specified in opcode
1668   @rtype: string
1669   @return: Iallocator name
1670
1671   """
1672   if not ialloc:
1673     # Use default iallocator
1674     ialloc = cfg.GetDefaultIAllocator()
1675
1676   if not ialloc:
1677     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678                                " opcode nor as a cluster-wide default",
1679                                errors.ECODE_INVAL)
1680
1681   return ialloc
1682
1683
1684 def _CheckHostnameSane(lu, name):
1685   """Ensures that a given hostname resolves to a 'sane' name.
1686
1687   The given name is required to be a prefix of the resolved hostname,
1688   to prevent accidental mismatches.
1689
1690   @param lu: the logical unit on behalf of which we're checking
1691   @param name: the name we should resolve and check
1692   @return: the resolved hostname object
1693
1694   """
1695   hostname = netutils.GetHostname(name=name)
1696   if hostname.name != name:
1697     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1698   if not utils.MatchNameComponent(name, [hostname.name]):
1699     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1700                                 " same as given hostname '%s'") %
1701                                 (hostname.name, name), errors.ECODE_INVAL)
1702   return hostname
1703
1704
1705 class LUClusterPostInit(LogicalUnit):
1706   """Logical unit for running hooks after cluster initialization.
1707
1708   """
1709   HPATH = "cluster-init"
1710   HTYPE = constants.HTYPE_CLUSTER
1711
1712   def BuildHooksEnv(self):
1713     """Build hooks env.
1714
1715     """
1716     return {
1717       "OP_TARGET": self.cfg.GetClusterName(),
1718       }
1719
1720   def BuildHooksNodes(self):
1721     """Build hooks nodes.
1722
1723     """
1724     return ([], [self.cfg.GetMasterNode()])
1725
1726   def Exec(self, feedback_fn):
1727     """Nothing to do.
1728
1729     """
1730     return True
1731
1732
1733 class LUClusterDestroy(LogicalUnit):
1734   """Logical unit for destroying the cluster.
1735
1736   """
1737   HPATH = "cluster-destroy"
1738   HTYPE = constants.HTYPE_CLUSTER
1739
1740   def BuildHooksEnv(self):
1741     """Build hooks env.
1742
1743     """
1744     return {
1745       "OP_TARGET": self.cfg.GetClusterName(),
1746       }
1747
1748   def BuildHooksNodes(self):
1749     """Build hooks nodes.
1750
1751     """
1752     return ([], [])
1753
1754   def CheckPrereq(self):
1755     """Check prerequisites.
1756
1757     This checks whether the cluster is empty.
1758
1759     Any errors are signaled by raising errors.OpPrereqError.
1760
1761     """
1762     master = self.cfg.GetMasterNode()
1763
1764     nodelist = self.cfg.GetNodeList()
1765     if len(nodelist) != 1 or nodelist[0] != master:
1766       raise errors.OpPrereqError("There are still %d node(s) in"
1767                                  " this cluster." % (len(nodelist) - 1),
1768                                  errors.ECODE_INVAL)
1769     instancelist = self.cfg.GetInstanceList()
1770     if instancelist:
1771       raise errors.OpPrereqError("There are still %d instance(s) in"
1772                                  " this cluster." % len(instancelist),
1773                                  errors.ECODE_INVAL)
1774
1775   def Exec(self, feedback_fn):
1776     """Destroys the cluster.
1777
1778     """
1779     master_params = self.cfg.GetMasterNetworkParameters()
1780
1781     # Run post hooks on master node before it's removed
1782     _RunPostHook(self, master_params.name)
1783
1784     ems = self.cfg.GetUseExternalMipScript()
1785     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1786                                                      master_params, ems)
1787     if result.fail_msg:
1788       self.LogWarning("Error disabling the master IP address: %s",
1789                       result.fail_msg)
1790
1791     return master_params.name
1792
1793
1794 def _VerifyCertificate(filename):
1795   """Verifies a certificate for L{LUClusterVerifyConfig}.
1796
1797   @type filename: string
1798   @param filename: Path to PEM file
1799
1800   """
1801   try:
1802     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1803                                            utils.ReadFile(filename))
1804   except Exception, err: # pylint: disable=W0703
1805     return (LUClusterVerifyConfig.ETYPE_ERROR,
1806             "Failed to load X509 certificate %s: %s" % (filename, err))
1807
1808   (errcode, msg) = \
1809     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1810                                 constants.SSL_CERT_EXPIRATION_ERROR)
1811
1812   if msg:
1813     fnamemsg = "While verifying %s: %s" % (filename, msg)
1814   else:
1815     fnamemsg = None
1816
1817   if errcode is None:
1818     return (None, fnamemsg)
1819   elif errcode == utils.CERT_WARNING:
1820     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1821   elif errcode == utils.CERT_ERROR:
1822     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1823
1824   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1825
1826
1827 def _GetAllHypervisorParameters(cluster, instances):
1828   """Compute the set of all hypervisor parameters.
1829
1830   @type cluster: L{objects.Cluster}
1831   @param cluster: the cluster object
1832   @param instances: list of L{objects.Instance}
1833   @param instances: additional instances from which to obtain parameters
1834   @rtype: list of (origin, hypervisor, parameters)
1835   @return: a list with all parameters found, indicating the hypervisor they
1836        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1837
1838   """
1839   hvp_data = []
1840
1841   for hv_name in cluster.enabled_hypervisors:
1842     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1843
1844   for os_name, os_hvp in cluster.os_hvp.items():
1845     for hv_name, hv_params in os_hvp.items():
1846       if hv_params:
1847         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1848         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1849
1850   # TODO: collapse identical parameter values in a single one
1851   for instance in instances:
1852     if instance.hvparams:
1853       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1854                        cluster.FillHV(instance)))
1855
1856   return hvp_data
1857
1858
1859 class _VerifyErrors(object):
1860   """Mix-in for cluster/group verify LUs.
1861
1862   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1863   self.op and self._feedback_fn to be available.)
1864
1865   """
1866
1867   ETYPE_FIELD = "code"
1868   ETYPE_ERROR = "ERROR"
1869   ETYPE_WARNING = "WARNING"
1870
1871   def _Error(self, ecode, item, msg, *args, **kwargs):
1872     """Format an error message.
1873
1874     Based on the opcode's error_codes parameter, either format a
1875     parseable error code, or a simpler error string.
1876
1877     This must be called only from Exec and functions called from Exec.
1878
1879     """
1880     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1881     itype, etxt, _ = ecode
1882     # first complete the msg
1883     if args:
1884       msg = msg % args
1885     # then format the whole message
1886     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1887       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1888     else:
1889       if item:
1890         item = " " + item
1891       else:
1892         item = ""
1893       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1894     # and finally report it via the feedback_fn
1895     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1896
1897   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1898     """Log an error message if the passed condition is True.
1899
1900     """
1901     cond = (bool(cond)
1902             or self.op.debug_simulate_errors) # pylint: disable=E1101
1903
1904     # If the error code is in the list of ignored errors, demote the error to a
1905     # warning
1906     (_, etxt, _) = ecode
1907     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1908       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1909
1910     if cond:
1911       self._Error(ecode, *args, **kwargs)
1912
1913     # do not mark the operation as failed for WARN cases only
1914     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1915       self.bad = self.bad or cond
1916
1917
1918 class LUClusterVerify(NoHooksLU):
1919   """Submits all jobs necessary to verify the cluster.
1920
1921   """
1922   REQ_BGL = False
1923
1924   def ExpandNames(self):
1925     self.needed_locks = {}
1926
1927   def Exec(self, feedback_fn):
1928     jobs = []
1929
1930     if self.op.group_name:
1931       groups = [self.op.group_name]
1932       depends_fn = lambda: None
1933     else:
1934       groups = self.cfg.GetNodeGroupList()
1935
1936       # Verify global configuration
1937       jobs.append([
1938         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1939         ])
1940
1941       # Always depend on global verification
1942       depends_fn = lambda: [(-len(jobs), [])]
1943
1944     jobs.extend(
1945       [opcodes.OpClusterVerifyGroup(group_name=group,
1946                                     ignore_errors=self.op.ignore_errors,
1947                                     depends=depends_fn())]
1948       for group in groups)
1949
1950     # Fix up all parameters
1951     for op in itertools.chain(*jobs): # pylint: disable=W0142
1952       op.debug_simulate_errors = self.op.debug_simulate_errors
1953       op.verbose = self.op.verbose
1954       op.error_codes = self.op.error_codes
1955       try:
1956         op.skip_checks = self.op.skip_checks
1957       except AttributeError:
1958         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1959
1960     return ResultWithJobs(jobs)
1961
1962
1963 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1964   """Verifies the cluster config.
1965
1966   """
1967   REQ_BGL = False
1968
1969   def _VerifyHVP(self, hvp_data):
1970     """Verifies locally the syntax of the hypervisor parameters.
1971
1972     """
1973     for item, hv_name, hv_params in hvp_data:
1974       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1975              (item, hv_name))
1976       try:
1977         hv_class = hypervisor.GetHypervisor(hv_name)
1978         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1979         hv_class.CheckParameterSyntax(hv_params)
1980       except errors.GenericError, err:
1981         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1982
1983   def ExpandNames(self):
1984     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1985     self.share_locks = _ShareAll()
1986
1987   def CheckPrereq(self):
1988     """Check prerequisites.
1989
1990     """
1991     # Retrieve all information
1992     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1993     self.all_node_info = self.cfg.GetAllNodesInfo()
1994     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1995
1996   def Exec(self, feedback_fn):
1997     """Verify integrity of cluster, performing various test on nodes.
1998
1999     """
2000     self.bad = False
2001     self._feedback_fn = feedback_fn
2002
2003     feedback_fn("* Verifying cluster config")
2004
2005     for msg in self.cfg.VerifyConfig():
2006       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2007
2008     feedback_fn("* Verifying cluster certificate files")
2009
2010     for cert_filename in pathutils.ALL_CERT_FILES:
2011       (errcode, msg) = _VerifyCertificate(cert_filename)
2012       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2013
2014     feedback_fn("* Verifying hypervisor parameters")
2015
2016     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2017                                                 self.all_inst_info.values()))
2018
2019     feedback_fn("* Verifying all nodes belong to an existing group")
2020
2021     # We do this verification here because, should this bogus circumstance
2022     # occur, it would never be caught by VerifyGroup, which only acts on
2023     # nodes/instances reachable from existing node groups.
2024
2025     dangling_nodes = set(node.name for node in self.all_node_info.values()
2026                          if node.group not in self.all_group_info)
2027
2028     dangling_instances = {}
2029     no_node_instances = []
2030
2031     for inst in self.all_inst_info.values():
2032       if inst.primary_node in dangling_nodes:
2033         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2034       elif inst.primary_node not in self.all_node_info:
2035         no_node_instances.append(inst.name)
2036
2037     pretty_dangling = [
2038         "%s (%s)" %
2039         (node.name,
2040          utils.CommaJoin(dangling_instances.get(node.name,
2041                                                 ["no instances"])))
2042         for node in dangling_nodes]
2043
2044     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2045                   None,
2046                   "the following nodes (and their instances) belong to a non"
2047                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2048
2049     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2050                   None,
2051                   "the following instances have a non-existing primary-node:"
2052                   " %s", utils.CommaJoin(no_node_instances))
2053
2054     return not self.bad
2055
2056
2057 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2058   """Verifies the status of a node group.
2059
2060   """
2061   HPATH = "cluster-verify"
2062   HTYPE = constants.HTYPE_CLUSTER
2063   REQ_BGL = False
2064
2065   _HOOKS_INDENT_RE = re.compile("^", re.M)
2066
2067   class NodeImage(object):
2068     """A class representing the logical and physical status of a node.
2069
2070     @type name: string
2071     @ivar name: the node name to which this object refers
2072     @ivar volumes: a structure as returned from
2073         L{ganeti.backend.GetVolumeList} (runtime)
2074     @ivar instances: a list of running instances (runtime)
2075     @ivar pinst: list of configured primary instances (config)
2076     @ivar sinst: list of configured secondary instances (config)
2077     @ivar sbp: dictionary of {primary-node: list of instances} for all
2078         instances for which this node is secondary (config)
2079     @ivar mfree: free memory, as reported by hypervisor (runtime)
2080     @ivar dfree: free disk, as reported by the node (runtime)
2081     @ivar offline: the offline status (config)
2082     @type rpc_fail: boolean
2083     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2084         not whether the individual keys were correct) (runtime)
2085     @type lvm_fail: boolean
2086     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2087     @type hyp_fail: boolean
2088     @ivar hyp_fail: whether the RPC call didn't return the instance list
2089     @type ghost: boolean
2090     @ivar ghost: whether this is a known node or not (config)
2091     @type os_fail: boolean
2092     @ivar os_fail: whether the RPC call didn't return valid OS data
2093     @type oslist: list
2094     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2095     @type vm_capable: boolean
2096     @ivar vm_capable: whether the node can host instances
2097
2098     """
2099     def __init__(self, offline=False, name=None, vm_capable=True):
2100       self.name = name
2101       self.volumes = {}
2102       self.instances = []
2103       self.pinst = []
2104       self.sinst = []
2105       self.sbp = {}
2106       self.mfree = 0
2107       self.dfree = 0
2108       self.offline = offline
2109       self.vm_capable = vm_capable
2110       self.rpc_fail = False
2111       self.lvm_fail = False
2112       self.hyp_fail = False
2113       self.ghost = False
2114       self.os_fail = False
2115       self.oslist = {}
2116
2117   def ExpandNames(self):
2118     # This raises errors.OpPrereqError on its own:
2119     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2120
2121     # Get instances in node group; this is unsafe and needs verification later
2122     inst_names = \
2123       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2124
2125     self.needed_locks = {
2126       locking.LEVEL_INSTANCE: inst_names,
2127       locking.LEVEL_NODEGROUP: [self.group_uuid],
2128       locking.LEVEL_NODE: [],
2129       }
2130
2131     self.share_locks = _ShareAll()
2132
2133   def DeclareLocks(self, level):
2134     if level == locking.LEVEL_NODE:
2135       # Get members of node group; this is unsafe and needs verification later
2136       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2137
2138       all_inst_info = self.cfg.GetAllInstancesInfo()
2139
2140       # In Exec(), we warn about mirrored instances that have primary and
2141       # secondary living in separate node groups. To fully verify that
2142       # volumes for these instances are healthy, we will need to do an
2143       # extra call to their secondaries. We ensure here those nodes will
2144       # be locked.
2145       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2146         # Important: access only the instances whose lock is owned
2147         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2148           nodes.update(all_inst_info[inst].secondary_nodes)
2149
2150       self.needed_locks[locking.LEVEL_NODE] = nodes
2151
2152   def CheckPrereq(self):
2153     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2154     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2155
2156     group_nodes = set(self.group_info.members)
2157     group_instances = \
2158       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2159
2160     unlocked_nodes = \
2161         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2162
2163     unlocked_instances = \
2164         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2165
2166     if unlocked_nodes:
2167       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2168                                  utils.CommaJoin(unlocked_nodes),
2169                                  errors.ECODE_STATE)
2170
2171     if unlocked_instances:
2172       raise errors.OpPrereqError("Missing lock for instances: %s" %
2173                                  utils.CommaJoin(unlocked_instances),
2174                                  errors.ECODE_STATE)
2175
2176     self.all_node_info = self.cfg.GetAllNodesInfo()
2177     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2178
2179     self.my_node_names = utils.NiceSort(group_nodes)
2180     self.my_inst_names = utils.NiceSort(group_instances)
2181
2182     self.my_node_info = dict((name, self.all_node_info[name])
2183                              for name in self.my_node_names)
2184
2185     self.my_inst_info = dict((name, self.all_inst_info[name])
2186                              for name in self.my_inst_names)
2187
2188     # We detect here the nodes that will need the extra RPC calls for verifying
2189     # split LV volumes; they should be locked.
2190     extra_lv_nodes = set()
2191
2192     for inst in self.my_inst_info.values():
2193       if inst.disk_template in constants.DTS_INT_MIRROR:
2194         for nname in inst.all_nodes:
2195           if self.all_node_info[nname].group != self.group_uuid:
2196             extra_lv_nodes.add(nname)
2197
2198     unlocked_lv_nodes = \
2199         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2200
2201     if unlocked_lv_nodes:
2202       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2203                                  utils.CommaJoin(unlocked_lv_nodes),
2204                                  errors.ECODE_STATE)
2205     self.extra_lv_nodes = list(extra_lv_nodes)
2206
2207   def _VerifyNode(self, ninfo, nresult):
2208     """Perform some basic validation on data returned from a node.
2209
2210       - check the result data structure is well formed and has all the
2211         mandatory fields
2212       - check ganeti version
2213
2214     @type ninfo: L{objects.Node}
2215     @param ninfo: the node to check
2216     @param nresult: the results from the node
2217     @rtype: boolean
2218     @return: whether overall this call was successful (and we can expect
2219          reasonable values in the respose)
2220
2221     """
2222     node = ninfo.name
2223     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2224
2225     # main result, nresult should be a non-empty dict
2226     test = not nresult or not isinstance(nresult, dict)
2227     _ErrorIf(test, constants.CV_ENODERPC, node,
2228                   "unable to verify node: no data returned")
2229     if test:
2230       return False
2231
2232     # compares ganeti version
2233     local_version = constants.PROTOCOL_VERSION
2234     remote_version = nresult.get("version", None)
2235     test = not (remote_version and
2236                 isinstance(remote_version, (list, tuple)) and
2237                 len(remote_version) == 2)
2238     _ErrorIf(test, constants.CV_ENODERPC, node,
2239              "connection to node returned invalid data")
2240     if test:
2241       return False
2242
2243     test = local_version != remote_version[0]
2244     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2245              "incompatible protocol versions: master %s,"
2246              " node %s", local_version, remote_version[0])
2247     if test:
2248       return False
2249
2250     # node seems compatible, we can actually try to look into its results
2251
2252     # full package version
2253     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2254                   constants.CV_ENODEVERSION, node,
2255                   "software version mismatch: master %s, node %s",
2256                   constants.RELEASE_VERSION, remote_version[1],
2257                   code=self.ETYPE_WARNING)
2258
2259     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2260     if ninfo.vm_capable and isinstance(hyp_result, dict):
2261       for hv_name, hv_result in hyp_result.iteritems():
2262         test = hv_result is not None
2263         _ErrorIf(test, constants.CV_ENODEHV, node,
2264                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2265
2266     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2267     if ninfo.vm_capable and isinstance(hvp_result, list):
2268       for item, hv_name, hv_result in hvp_result:
2269         _ErrorIf(True, constants.CV_ENODEHV, node,
2270                  "hypervisor %s parameter verify failure (source %s): %s",
2271                  hv_name, item, hv_result)
2272
2273     test = nresult.get(constants.NV_NODESETUP,
2274                        ["Missing NODESETUP results"])
2275     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2276              "; ".join(test))
2277
2278     return True
2279
2280   def _VerifyNodeTime(self, ninfo, nresult,
2281                       nvinfo_starttime, nvinfo_endtime):
2282     """Check the node time.
2283
2284     @type ninfo: L{objects.Node}
2285     @param ninfo: the node to check
2286     @param nresult: the remote results for the node
2287     @param nvinfo_starttime: the start time of the RPC call
2288     @param nvinfo_endtime: the end time of the RPC call
2289
2290     """
2291     node = ninfo.name
2292     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2293
2294     ntime = nresult.get(constants.NV_TIME, None)
2295     try:
2296       ntime_merged = utils.MergeTime(ntime)
2297     except (ValueError, TypeError):
2298       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2299       return
2300
2301     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2302       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2303     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2304       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2305     else:
2306       ntime_diff = None
2307
2308     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2309              "Node time diverges by at least %s from master node time",
2310              ntime_diff)
2311
2312   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2313     """Check the node LVM results.
2314
2315     @type ninfo: L{objects.Node}
2316     @param ninfo: the node to check
2317     @param nresult: the remote results for the node
2318     @param vg_name: the configured VG name
2319
2320     """
2321     if vg_name is None:
2322       return
2323
2324     node = ninfo.name
2325     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326
2327     # checks vg existence and size > 20G
2328     vglist = nresult.get(constants.NV_VGLIST, None)
2329     test = not vglist
2330     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2331     if not test:
2332       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2333                                             constants.MIN_VG_SIZE)
2334       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2335
2336     # check pv names
2337     pvlist = nresult.get(constants.NV_PVLIST, None)
2338     test = pvlist is None
2339     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2340     if not test:
2341       # check that ':' is not present in PV names, since it's a
2342       # special character for lvcreate (denotes the range of PEs to
2343       # use on the PV)
2344       for _, pvname, owner_vg in pvlist:
2345         test = ":" in pvname
2346         _ErrorIf(test, constants.CV_ENODELVM, node,
2347                  "Invalid character ':' in PV '%s' of VG '%s'",
2348                  pvname, owner_vg)
2349
2350   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2351     """Check the node bridges.
2352
2353     @type ninfo: L{objects.Node}
2354     @param ninfo: the node to check
2355     @param nresult: the remote results for the node
2356     @param bridges: the expected list of bridges
2357
2358     """
2359     if not bridges:
2360       return
2361
2362     node = ninfo.name
2363     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2364
2365     missing = nresult.get(constants.NV_BRIDGES, None)
2366     test = not isinstance(missing, list)
2367     _ErrorIf(test, constants.CV_ENODENET, node,
2368              "did not return valid bridge information")
2369     if not test:
2370       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2371                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2372
2373   def _VerifyNodeUserScripts(self, ninfo, nresult):
2374     """Check the results of user scripts presence and executability on the node
2375
2376     @type ninfo: L{objects.Node}
2377     @param ninfo: the node to check
2378     @param nresult: the remote results for the node
2379
2380     """
2381     node = ninfo.name
2382
2383     test = not constants.NV_USERSCRIPTS in nresult
2384     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2385                   "did not return user scripts information")
2386
2387     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2388     if not test:
2389       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2390                     "user scripts not present or not executable: %s" %
2391                     utils.CommaJoin(sorted(broken_scripts)))
2392
2393   def _VerifyNodeNetwork(self, ninfo, nresult):
2394     """Check the node network connectivity results.
2395
2396     @type ninfo: L{objects.Node}
2397     @param ninfo: the node to check
2398     @param nresult: the remote results for the node
2399
2400     """
2401     node = ninfo.name
2402     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2403
2404     test = constants.NV_NODELIST not in nresult
2405     _ErrorIf(test, constants.CV_ENODESSH, node,
2406              "node hasn't returned node ssh connectivity data")
2407     if not test:
2408       if nresult[constants.NV_NODELIST]:
2409         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2410           _ErrorIf(True, constants.CV_ENODESSH, node,
2411                    "ssh communication with node '%s': %s", a_node, a_msg)
2412
2413     test = constants.NV_NODENETTEST not in nresult
2414     _ErrorIf(test, constants.CV_ENODENET, node,
2415              "node hasn't returned node tcp connectivity data")
2416     if not test:
2417       if nresult[constants.NV_NODENETTEST]:
2418         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2419         for anode in nlist:
2420           _ErrorIf(True, constants.CV_ENODENET, node,
2421                    "tcp communication with node '%s': %s",
2422                    anode, nresult[constants.NV_NODENETTEST][anode])
2423
2424     test = constants.NV_MASTERIP not in nresult
2425     _ErrorIf(test, constants.CV_ENODENET, node,
2426              "node hasn't returned node master IP reachability data")
2427     if not test:
2428       if not nresult[constants.NV_MASTERIP]:
2429         if node == self.master_node:
2430           msg = "the master node cannot reach the master IP (not configured?)"
2431         else:
2432           msg = "cannot reach the master IP"
2433         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2434
2435   def _VerifyInstance(self, instance, instanceconfig, node_image,
2436                       diskstatus):
2437     """Verify an instance.
2438
2439     This function checks to see if the required block devices are
2440     available on the instance's node.
2441
2442     """
2443     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444     node_current = instanceconfig.primary_node
2445
2446     node_vol_should = {}
2447     instanceconfig.MapLVsByNode(node_vol_should)
2448
2449     cluster = self.cfg.GetClusterInfo()
2450     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2451                                                             self.group_info)
2452     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2453     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2454
2455     for node in node_vol_should:
2456       n_img = node_image[node]
2457       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2458         # ignore missing volumes on offline or broken nodes
2459         continue
2460       for volume in node_vol_should[node]:
2461         test = volume not in n_img.volumes
2462         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2463                  "volume %s missing on node %s", volume, node)
2464
2465     if instanceconfig.admin_state == constants.ADMINST_UP:
2466       pri_img = node_image[node_current]
2467       test = instance not in pri_img.instances and not pri_img.offline
2468       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2469                "instance not running on its primary node %s",
2470                node_current)
2471
2472     diskdata = [(nname, success, status, idx)
2473                 for (nname, disks) in diskstatus.items()
2474                 for idx, (success, status) in enumerate(disks)]
2475
2476     for nname, success, bdev_status, idx in diskdata:
2477       # the 'ghost node' construction in Exec() ensures that we have a
2478       # node here
2479       snode = node_image[nname]
2480       bad_snode = snode.ghost or snode.offline
2481       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2482                not success and not bad_snode,
2483                constants.CV_EINSTANCEFAULTYDISK, instance,
2484                "couldn't retrieve status for disk/%s on %s: %s",
2485                idx, nname, bdev_status)
2486       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2487                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2488                constants.CV_EINSTANCEFAULTYDISK, instance,
2489                "disk/%s on %s is faulty", idx, nname)
2490
2491   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2492     """Verify if there are any unknown volumes in the cluster.
2493
2494     The .os, .swap and backup volumes are ignored. All other volumes are
2495     reported as unknown.
2496
2497     @type reserved: L{ganeti.utils.FieldSet}
2498     @param reserved: a FieldSet of reserved volume names
2499
2500     """
2501     for node, n_img in node_image.items():
2502       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2503           self.all_node_info[node].group != self.group_uuid):
2504         # skip non-healthy nodes
2505         continue
2506       for volume in n_img.volumes:
2507         test = ((node not in node_vol_should or
2508                 volume not in node_vol_should[node]) and
2509                 not reserved.Matches(volume))
2510         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2511                       "volume %s is unknown", volume)
2512
2513   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2514     """Verify N+1 Memory Resilience.
2515
2516     Check that if one single node dies we can still start all the
2517     instances it was primary for.
2518
2519     """
2520     cluster_info = self.cfg.GetClusterInfo()
2521     for node, n_img in node_image.items():
2522       # This code checks that every node which is now listed as
2523       # secondary has enough memory to host all instances it is
2524       # supposed to should a single other node in the cluster fail.
2525       # FIXME: not ready for failover to an arbitrary node
2526       # FIXME: does not support file-backed instances
2527       # WARNING: we currently take into account down instances as well
2528       # as up ones, considering that even if they're down someone
2529       # might want to start them even in the event of a node failure.
2530       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2531         # we're skipping nodes marked offline and nodes in other groups from
2532         # the N+1 warning, since most likely we don't have good memory
2533         # infromation from them; we already list instances living on such
2534         # nodes, and that's enough warning
2535         continue
2536       #TODO(dynmem): also consider ballooning out other instances
2537       for prinode, instances in n_img.sbp.items():
2538         needed_mem = 0
2539         for instance in instances:
2540           bep = cluster_info.FillBE(instance_cfg[instance])
2541           if bep[constants.BE_AUTO_BALANCE]:
2542             needed_mem += bep[constants.BE_MINMEM]
2543         test = n_img.mfree < needed_mem
2544         self._ErrorIf(test, constants.CV_ENODEN1, node,
2545                       "not enough memory to accomodate instance failovers"
2546                       " should node %s fail (%dMiB needed, %dMiB available)",
2547                       prinode, needed_mem, n_img.mfree)
2548
2549   @classmethod
2550   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2551                    (files_all, files_opt, files_mc, files_vm)):
2552     """Verifies file checksums collected from all nodes.
2553
2554     @param errorif: Callback for reporting errors
2555     @param nodeinfo: List of L{objects.Node} objects
2556     @param master_node: Name of master node
2557     @param all_nvinfo: RPC results
2558
2559     """
2560     # Define functions determining which nodes to consider for a file
2561     files2nodefn = [
2562       (files_all, None),
2563       (files_mc, lambda node: (node.master_candidate or
2564                                node.name == master_node)),
2565       (files_vm, lambda node: node.vm_capable),
2566       ]
2567
2568     # Build mapping from filename to list of nodes which should have the file
2569     nodefiles = {}
2570     for (files, fn) in files2nodefn:
2571       if fn is None:
2572         filenodes = nodeinfo
2573       else:
2574         filenodes = filter(fn, nodeinfo)
2575       nodefiles.update((filename,
2576                         frozenset(map(operator.attrgetter("name"), filenodes)))
2577                        for filename in files)
2578
2579     assert set(nodefiles) == (files_all | files_mc | files_vm)
2580
2581     fileinfo = dict((filename, {}) for filename in nodefiles)
2582     ignore_nodes = set()
2583
2584     for node in nodeinfo:
2585       if node.offline:
2586         ignore_nodes.add(node.name)
2587         continue
2588
2589       nresult = all_nvinfo[node.name]
2590
2591       if nresult.fail_msg or not nresult.payload:
2592         node_files = None
2593       else:
2594         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2595         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2596                           for (key, value) in fingerprints.items())
2597         del fingerprints
2598
2599       test = not (node_files and isinstance(node_files, dict))
2600       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2601               "Node did not return file checksum data")
2602       if test:
2603         ignore_nodes.add(node.name)
2604         continue
2605
2606       # Build per-checksum mapping from filename to nodes having it
2607       for (filename, checksum) in node_files.items():
2608         assert filename in nodefiles
2609         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2610
2611     for (filename, checksums) in fileinfo.items():
2612       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2613
2614       # Nodes having the file
2615       with_file = frozenset(node_name
2616                             for nodes in fileinfo[filename].values()
2617                             for node_name in nodes) - ignore_nodes
2618
2619       expected_nodes = nodefiles[filename] - ignore_nodes
2620
2621       # Nodes missing file
2622       missing_file = expected_nodes - with_file
2623
2624       if filename in files_opt:
2625         # All or no nodes
2626         errorif(missing_file and missing_file != expected_nodes,
2627                 constants.CV_ECLUSTERFILECHECK, None,
2628                 "File %s is optional, but it must exist on all or no"
2629                 " nodes (not found on %s)",
2630                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2631       else:
2632         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2633                 "File %s is missing from node(s) %s", filename,
2634                 utils.CommaJoin(utils.NiceSort(missing_file)))
2635
2636         # Warn if a node has a file it shouldn't
2637         unexpected = with_file - expected_nodes
2638         errorif(unexpected,
2639                 constants.CV_ECLUSTERFILECHECK, None,
2640                 "File %s should not exist on node(s) %s",
2641                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2642
2643       # See if there are multiple versions of the file
2644       test = len(checksums) > 1
2645       if test:
2646         variants = ["variant %s on %s" %
2647                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2648                     for (idx, (checksum, nodes)) in
2649                       enumerate(sorted(checksums.items()))]
2650       else:
2651         variants = []
2652
2653       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2654               "File %s found with %s different checksums (%s)",
2655               filename, len(checksums), "; ".join(variants))
2656
2657   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2658                       drbd_map):
2659     """Verifies and the node DRBD status.
2660
2661     @type ninfo: L{objects.Node}
2662     @param ninfo: the node to check
2663     @param nresult: the remote results for the node
2664     @param instanceinfo: the dict of instances
2665     @param drbd_helper: the configured DRBD usermode helper
2666     @param drbd_map: the DRBD map as returned by
2667         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2668
2669     """
2670     node = ninfo.name
2671     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2672
2673     if drbd_helper:
2674       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2675       test = (helper_result is None)
2676       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2677                "no drbd usermode helper returned")
2678       if helper_result:
2679         status, payload = helper_result
2680         test = not status
2681         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2682                  "drbd usermode helper check unsuccessful: %s", payload)
2683         test = status and (payload != drbd_helper)
2684         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2685                  "wrong drbd usermode helper: %s", payload)
2686
2687     # compute the DRBD minors
2688     node_drbd = {}
2689     for minor, instance in drbd_map[node].items():
2690       test = instance not in instanceinfo
2691       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2692                "ghost instance '%s' in temporary DRBD map", instance)
2693         # ghost instance should not be running, but otherwise we
2694         # don't give double warnings (both ghost instance and
2695         # unallocated minor in use)
2696       if test:
2697         node_drbd[minor] = (instance, False)
2698       else:
2699         instance = instanceinfo[instance]
2700         node_drbd[minor] = (instance.name,
2701                             instance.admin_state == constants.ADMINST_UP)
2702
2703     # and now check them
2704     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2705     test = not isinstance(used_minors, (tuple, list))
2706     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2707              "cannot parse drbd status file: %s", str(used_minors))
2708     if test:
2709       # we cannot check drbd status
2710       return
2711
2712     for minor, (iname, must_exist) in node_drbd.items():
2713       test = minor not in used_minors and must_exist
2714       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715                "drbd minor %d of instance %s is not active", minor, iname)
2716     for minor in used_minors:
2717       test = minor not in node_drbd
2718       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2719                "unallocated drbd minor %d is in use", minor)
2720
2721   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2722     """Builds the node OS structures.
2723
2724     @type ninfo: L{objects.Node}
2725     @param ninfo: the node to check
2726     @param nresult: the remote results for the node
2727     @param nimg: the node image object
2728
2729     """
2730     node = ninfo.name
2731     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2732
2733     remote_os = nresult.get(constants.NV_OSLIST, None)
2734     test = (not isinstance(remote_os, list) or
2735             not compat.all(isinstance(v, list) and len(v) == 7
2736                            for v in remote_os))
2737
2738     _ErrorIf(test, constants.CV_ENODEOS, node,
2739              "node hasn't returned valid OS data")
2740
2741     nimg.os_fail = test
2742
2743     if test:
2744       return
2745
2746     os_dict = {}
2747
2748     for (name, os_path, status, diagnose,
2749          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2750
2751       if name not in os_dict:
2752         os_dict[name] = []
2753
2754       # parameters is a list of lists instead of list of tuples due to
2755       # JSON lacking a real tuple type, fix it:
2756       parameters = [tuple(v) for v in parameters]
2757       os_dict[name].append((os_path, status, diagnose,
2758                             set(variants), set(parameters), set(api_ver)))
2759
2760     nimg.oslist = os_dict
2761
2762   def _VerifyNodeOS(self, ninfo, nimg, base):
2763     """Verifies the node OS list.
2764
2765     @type ninfo: L{objects.Node}
2766     @param ninfo: the node to check
2767     @param nimg: the node image object
2768     @param base: the 'template' node we match against (e.g. from the master)
2769
2770     """
2771     node = ninfo.name
2772     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2773
2774     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2775
2776     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2777     for os_name, os_data in nimg.oslist.items():
2778       assert os_data, "Empty OS status for OS %s?!" % os_name
2779       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2780       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2781                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2782       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2783                "OS '%s' has multiple entries (first one shadows the rest): %s",
2784                os_name, utils.CommaJoin([v[0] for v in os_data]))
2785       # comparisons with the 'base' image
2786       test = os_name not in base.oslist
2787       _ErrorIf(test, constants.CV_ENODEOS, node,
2788                "Extra OS %s not present on reference node (%s)",
2789                os_name, base.name)
2790       if test:
2791         continue
2792       assert base.oslist[os_name], "Base node has empty OS status?"
2793       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2794       if not b_status:
2795         # base OS is invalid, skipping
2796         continue
2797       for kind, a, b in [("API version", f_api, b_api),
2798                          ("variants list", f_var, b_var),
2799                          ("parameters", beautify_params(f_param),
2800                           beautify_params(b_param))]:
2801         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2802                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2803                  kind, os_name, base.name,
2804                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2805
2806     # check any missing OSes
2807     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2808     _ErrorIf(missing, constants.CV_ENODEOS, node,
2809              "OSes present on reference node %s but missing on this node: %s",
2810              base.name, utils.CommaJoin(missing))
2811
2812   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2813     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2814
2815     @type ninfo: L{objects.Node}
2816     @param ninfo: the node to check
2817     @param nresult: the remote results for the node
2818     @type is_master: bool
2819     @param is_master: Whether node is the master node
2820
2821     """
2822     node = ninfo.name
2823
2824     if (is_master and
2825         (constants.ENABLE_FILE_STORAGE or
2826          constants.ENABLE_SHARED_FILE_STORAGE)):
2827       try:
2828         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2829       except KeyError:
2830         # This should never happen
2831         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2832                       "Node did not return forbidden file storage paths")
2833       else:
2834         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2835                       "Found forbidden file storage paths: %s",
2836                       utils.CommaJoin(fspaths))
2837     else:
2838       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2839                     constants.CV_ENODEFILESTORAGEPATHS, node,
2840                     "Node should not have returned forbidden file storage"
2841                     " paths")
2842
2843   def _VerifyOob(self, ninfo, nresult):
2844     """Verifies out of band functionality of a node.
2845
2846     @type ninfo: L{objects.Node}
2847     @param ninfo: the node to check
2848     @param nresult: the remote results for the node
2849
2850     """
2851     node = ninfo.name
2852     # We just have to verify the paths on master and/or master candidates
2853     # as the oob helper is invoked on the master
2854     if ((ninfo.master_candidate or ninfo.master_capable) and
2855         constants.NV_OOB_PATHS in nresult):
2856       for path_result in nresult[constants.NV_OOB_PATHS]:
2857         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2858
2859   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2860     """Verifies and updates the node volume data.
2861
2862     This function will update a L{NodeImage}'s internal structures
2863     with data from the remote call.
2864
2865     @type ninfo: L{objects.Node}
2866     @param ninfo: the node to check
2867     @param nresult: the remote results for the node
2868     @param nimg: the node image object
2869     @param vg_name: the configured VG name
2870
2871     """
2872     node = ninfo.name
2873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2874
2875     nimg.lvm_fail = True
2876     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2877     if vg_name is None:
2878       pass
2879     elif isinstance(lvdata, basestring):
2880       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2881                utils.SafeEncode(lvdata))
2882     elif not isinstance(lvdata, dict):
2883       _ErrorIf(True, constants.CV_ENODELVM, node,
2884                "rpc call to node failed (lvlist)")
2885     else:
2886       nimg.volumes = lvdata
2887       nimg.lvm_fail = False
2888
2889   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2890     """Verifies and updates the node instance list.
2891
2892     If the listing was successful, then updates this node's instance
2893     list. Otherwise, it marks the RPC call as failed for the instance
2894     list key.
2895
2896     @type ninfo: L{objects.Node}
2897     @param ninfo: the node to check
2898     @param nresult: the remote results for the node
2899     @param nimg: the node image object
2900
2901     """
2902     idata = nresult.get(constants.NV_INSTANCELIST, None)
2903     test = not isinstance(idata, list)
2904     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2905                   "rpc call to node failed (instancelist): %s",
2906                   utils.SafeEncode(str(idata)))
2907     if test:
2908       nimg.hyp_fail = True
2909     else:
2910       nimg.instances = idata
2911
2912   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2913     """Verifies and computes a node information map
2914
2915     @type ninfo: L{objects.Node}
2916     @param ninfo: the node to check
2917     @param nresult: the remote results for the node
2918     @param nimg: the node image object
2919     @param vg_name: the configured VG name
2920
2921     """
2922     node = ninfo.name
2923     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2924
2925     # try to read free memory (from the hypervisor)
2926     hv_info = nresult.get(constants.NV_HVINFO, None)
2927     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2928     _ErrorIf(test, constants.CV_ENODEHV, node,
2929              "rpc call to node failed (hvinfo)")
2930     if not test:
2931       try:
2932         nimg.mfree = int(hv_info["memory_free"])
2933       except (ValueError, TypeError):
2934         _ErrorIf(True, constants.CV_ENODERPC, node,
2935                  "node returned invalid nodeinfo, check hypervisor")
2936
2937     # FIXME: devise a free space model for file based instances as well
2938     if vg_name is not None:
2939       test = (constants.NV_VGLIST not in nresult or
2940               vg_name not in nresult[constants.NV_VGLIST])
2941       _ErrorIf(test, constants.CV_ENODELVM, node,
2942                "node didn't return data for the volume group '%s'"
2943                " - it is either missing or broken", vg_name)
2944       if not test:
2945         try:
2946           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2947         except (ValueError, TypeError):
2948           _ErrorIf(True, constants.CV_ENODERPC, node,
2949                    "node returned invalid LVM info, check LVM status")
2950
2951   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2952     """Gets per-disk status information for all instances.
2953
2954     @type nodelist: list of strings
2955     @param nodelist: Node names
2956     @type node_image: dict of (name, L{objects.Node})
2957     @param node_image: Node objects
2958     @type instanceinfo: dict of (name, L{objects.Instance})
2959     @param instanceinfo: Instance objects
2960     @rtype: {instance: {node: [(succes, payload)]}}
2961     @return: a dictionary of per-instance dictionaries with nodes as
2962         keys and disk information as values; the disk information is a
2963         list of tuples (success, payload)
2964
2965     """
2966     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2967
2968     node_disks = {}
2969     node_disks_devonly = {}
2970     diskless_instances = set()
2971     diskless = constants.DT_DISKLESS
2972
2973     for nname in nodelist:
2974       node_instances = list(itertools.chain(node_image[nname].pinst,
2975                                             node_image[nname].sinst))
2976       diskless_instances.update(inst for inst in node_instances
2977                                 if instanceinfo[inst].disk_template == diskless)
2978       disks = [(inst, disk)
2979                for inst in node_instances
2980                for disk in instanceinfo[inst].disks]
2981
2982       if not disks:
2983         # No need to collect data
2984         continue
2985
2986       node_disks[nname] = disks
2987
2988       # _AnnotateDiskParams makes already copies of the disks
2989       devonly = []
2990       for (inst, dev) in disks:
2991         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2992         self.cfg.SetDiskID(anno_disk, nname)
2993         devonly.append(anno_disk)
2994
2995       node_disks_devonly[nname] = devonly
2996
2997     assert len(node_disks) == len(node_disks_devonly)
2998
2999     # Collect data from all nodes with disks
3000     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3001                                                           node_disks_devonly)
3002
3003     assert len(result) == len(node_disks)
3004
3005     instdisk = {}
3006
3007     for (nname, nres) in result.items():
3008       disks = node_disks[nname]
3009
3010       if nres.offline:
3011         # No data from this node
3012         data = len(disks) * [(False, "node offline")]
3013       else:
3014         msg = nres.fail_msg
3015         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3016                  "while getting disk information: %s", msg)
3017         if msg:
3018           # No data from this node
3019           data = len(disks) * [(False, msg)]
3020         else:
3021           data = []
3022           for idx, i in enumerate(nres.payload):
3023             if isinstance(i, (tuple, list)) and len(i) == 2:
3024               data.append(i)
3025             else:
3026               logging.warning("Invalid result from node %s, entry %d: %s",
3027                               nname, idx, i)
3028               data.append((False, "Invalid result from the remote node"))
3029
3030       for ((inst, _), status) in zip(disks, data):
3031         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3032
3033     # Add empty entries for diskless instances.
3034     for inst in diskless_instances:
3035       assert inst not in instdisk
3036       instdisk[inst] = {}
3037
3038     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3039                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3040                       compat.all(isinstance(s, (tuple, list)) and
3041                                  len(s) == 2 for s in statuses)
3042                       for inst, nnames in instdisk.items()
3043                       for nname, statuses in nnames.items())
3044     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3045
3046     return instdisk
3047
3048   @staticmethod
3049   def _SshNodeSelector(group_uuid, all_nodes):
3050     """Create endless iterators for all potential SSH check hosts.
3051
3052     """
3053     nodes = [node for node in all_nodes
3054              if (node.group != group_uuid and
3055                  not node.offline)]
3056     keyfunc = operator.attrgetter("group")
3057
3058     return map(itertools.cycle,
3059                [sorted(map(operator.attrgetter("name"), names))
3060                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3061                                                   keyfunc)])
3062
3063   @classmethod
3064   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3065     """Choose which nodes should talk to which other nodes.
3066
3067     We will make nodes contact all nodes in their group, and one node from
3068     every other group.
3069
3070     @warning: This algorithm has a known issue if one node group is much
3071       smaller than others (e.g. just one node). In such a case all other
3072       nodes will talk to the single node.
3073
3074     """
3075     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3076     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3077
3078     return (online_nodes,
3079             dict((name, sorted([i.next() for i in sel]))
3080                  for name in online_nodes))
3081
3082   def BuildHooksEnv(self):
3083     """Build hooks env.
3084
3085     Cluster-Verify hooks just ran in the post phase and their failure makes
3086     the output be logged in the verify output and the verification to fail.
3087
3088     """
3089     env = {
3090       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3091       }
3092
3093     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3094                for node in self.my_node_info.values())
3095
3096     return env
3097
3098   def BuildHooksNodes(self):
3099     """Build hooks nodes.
3100
3101     """
3102     return ([], self.my_node_names)
3103
3104   def Exec(self, feedback_fn):
3105     """Verify integrity of the node group, performing various test on nodes.
3106
3107     """
3108     # This method has too many local variables. pylint: disable=R0914
3109     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3110
3111     if not self.my_node_names:
3112       # empty node group
3113       feedback_fn("* Empty node group, skipping verification")
3114       return True
3115
3116     self.bad = False
3117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118     verbose = self.op.verbose
3119     self._feedback_fn = feedback_fn
3120
3121     vg_name = self.cfg.GetVGName()
3122     drbd_helper = self.cfg.GetDRBDHelper()
3123     cluster = self.cfg.GetClusterInfo()
3124     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3125     hypervisors = cluster.enabled_hypervisors
3126     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3127
3128     i_non_redundant = [] # Non redundant instances
3129     i_non_a_balanced = [] # Non auto-balanced instances
3130     i_offline = 0 # Count of offline instances
3131     n_offline = 0 # Count of offline nodes
3132     n_drained = 0 # Count of nodes being drained
3133     node_vol_should = {}
3134
3135     # FIXME: verify OS list
3136
3137     # File verification
3138     filemap = _ComputeAncillaryFiles(cluster, False)
3139
3140     # do local checksums
3141     master_node = self.master_node = self.cfg.GetMasterNode()
3142     master_ip = self.cfg.GetMasterIP()
3143
3144     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3145
3146     user_scripts = []
3147     if self.cfg.GetUseExternalMipScript():
3148       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3149
3150     node_verify_param = {
3151       constants.NV_FILELIST:
3152         map(vcluster.MakeVirtualPath,
3153             utils.UniqueSequence(filename
3154                                  for files in filemap
3155                                  for filename in files)),
3156       constants.NV_NODELIST:
3157         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3158                                   self.all_node_info.values()),
3159       constants.NV_HYPERVISOR: hypervisors,
3160       constants.NV_HVPARAMS:
3161         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3162       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3163                                  for node in node_data_list
3164                                  if not node.offline],
3165       constants.NV_INSTANCELIST: hypervisors,
3166       constants.NV_VERSION: None,
3167       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3168       constants.NV_NODESETUP: None,
3169       constants.NV_TIME: None,
3170       constants.NV_MASTERIP: (master_node, master_ip),
3171       constants.NV_OSLIST: None,
3172       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3173       constants.NV_USERSCRIPTS: user_scripts,
3174       }
3175
3176     if vg_name is not None:
3177       node_verify_param[constants.NV_VGLIST] = None
3178       node_verify_param[constants.NV_LVLIST] = vg_name
3179       node_verify_param[constants.NV_PVLIST] = [vg_name]
3180
3181     if drbd_helper:
3182       node_verify_param[constants.NV_DRBDLIST] = None
3183       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3184
3185     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3186       # Load file storage paths only from master node
3187       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3188
3189     # bridge checks
3190     # FIXME: this needs to be changed per node-group, not cluster-wide
3191     bridges = set()
3192     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3193     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3194       bridges.add(default_nicpp[constants.NIC_LINK])
3195     for instance in self.my_inst_info.values():
3196       for nic in instance.nics:
3197         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3198         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3199           bridges.add(full_nic[constants.NIC_LINK])
3200
3201     if bridges:
3202       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3203
3204     # Build our expected cluster state
3205     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3206                                                  name=node.name,
3207                                                  vm_capable=node.vm_capable))
3208                       for node in node_data_list)
3209
3210     # Gather OOB paths
3211     oob_paths = []
3212     for node in self.all_node_info.values():
3213       path = _SupportsOob(self.cfg, node)
3214       if path and path not in oob_paths:
3215         oob_paths.append(path)
3216
3217     if oob_paths:
3218       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3219
3220     for instance in self.my_inst_names:
3221       inst_config = self.my_inst_info[instance]
3222       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3223         i_offline += 1
3224
3225       for nname in inst_config.all_nodes:
3226         if nname not in node_image:
3227           gnode = self.NodeImage(name=nname)
3228           gnode.ghost = (nname not in self.all_node_info)
3229           node_image[nname] = gnode
3230
3231       inst_config.MapLVsByNode(node_vol_should)
3232
3233       pnode = inst_config.primary_node
3234       node_image[pnode].pinst.append(instance)
3235
3236       for snode in inst_config.secondary_nodes:
3237         nimg = node_image[snode]
3238         nimg.sinst.append(instance)
3239         if pnode not in nimg.sbp:
3240           nimg.sbp[pnode] = []
3241         nimg.sbp[pnode].append(instance)
3242
3243     # At this point, we have the in-memory data structures complete,
3244     # except for the runtime information, which we'll gather next
3245
3246     # Due to the way our RPC system works, exact response times cannot be
3247     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3248     # time before and after executing the request, we can at least have a time
3249     # window.
3250     nvinfo_starttime = time.time()
3251     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3252                                            node_verify_param,
3253                                            self.cfg.GetClusterName())
3254     nvinfo_endtime = time.time()
3255
3256     if self.extra_lv_nodes and vg_name is not None:
3257       extra_lv_nvinfo = \
3258           self.rpc.call_node_verify(self.extra_lv_nodes,
3259                                     {constants.NV_LVLIST: vg_name},
3260                                     self.cfg.GetClusterName())
3261     else:
3262       extra_lv_nvinfo = {}
3263
3264     all_drbd_map = self.cfg.ComputeDRBDMap()
3265
3266     feedback_fn("* Gathering disk information (%s nodes)" %
3267                 len(self.my_node_names))
3268     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3269                                      self.my_inst_info)
3270
3271     feedback_fn("* Verifying configuration file consistency")
3272
3273     # If not all nodes are being checked, we need to make sure the master node
3274     # and a non-checked vm_capable node are in the list.
3275     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3276     if absent_nodes:
3277       vf_nvinfo = all_nvinfo.copy()
3278       vf_node_info = list(self.my_node_info.values())
3279       additional_nodes = []
3280       if master_node not in self.my_node_info:
3281         additional_nodes.append(master_node)
3282         vf_node_info.append(self.all_node_info[master_node])
3283       # Add the first vm_capable node we find which is not included,
3284       # excluding the master node (which we already have)
3285       for node in absent_nodes:
3286         nodeinfo = self.all_node_info[node]
3287         if (nodeinfo.vm_capable and not nodeinfo.offline and
3288             node != master_node):
3289           additional_nodes.append(node)
3290           vf_node_info.append(self.all_node_info[node])
3291           break
3292       key = constants.NV_FILELIST
3293       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3294                                                  {key: node_verify_param[key]},
3295                                                  self.cfg.GetClusterName()))
3296     else:
3297       vf_nvinfo = all_nvinfo
3298       vf_node_info = self.my_node_info.values()
3299
3300     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3301
3302     feedback_fn("* Verifying node status")
3303
3304     refos_img = None
3305
3306     for node_i in node_data_list:
3307       node = node_i.name
3308       nimg = node_image[node]
3309
3310       if node_i.offline:
3311         if verbose:
3312           feedback_fn("* Skipping offline node %s" % (node,))
3313         n_offline += 1
3314         continue
3315
3316       if node == master_node:
3317         ntype = "master"
3318       elif node_i.master_candidate:
3319         ntype = "master candidate"
3320       elif node_i.drained:
3321         ntype = "drained"
3322         n_drained += 1
3323       else:
3324         ntype = "regular"
3325       if verbose:
3326         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3327
3328       msg = all_nvinfo[node].fail_msg
3329       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3330                msg)
3331       if msg:
3332         nimg.rpc_fail = True
3333         continue
3334
3335       nresult = all_nvinfo[node].payload
3336
3337       nimg.call_ok = self._VerifyNode(node_i, nresult)
3338       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3339       self._VerifyNodeNetwork(node_i, nresult)
3340       self._VerifyNodeUserScripts(node_i, nresult)
3341       self._VerifyOob(node_i, nresult)
3342       self._VerifyFileStoragePaths(node_i, nresult,
3343                                    node == master_node)
3344
3345       if nimg.vm_capable:
3346         self._VerifyNodeLVM(node_i, nresult, vg_name)
3347         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3348                              all_drbd_map)
3349
3350         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3351         self._UpdateNodeInstances(node_i, nresult, nimg)
3352         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3353         self._UpdateNodeOS(node_i, nresult, nimg)
3354
3355         if not nimg.os_fail:
3356           if refos_img is None:
3357             refos_img = nimg
3358           self._VerifyNodeOS(node_i, nimg, refos_img)
3359         self._VerifyNodeBridges(node_i, nresult, bridges)
3360
3361         # Check whether all running instancies are primary for the node. (This
3362         # can no longer be done from _VerifyInstance below, since some of the
3363         # wrong instances could be from other node groups.)
3364         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3365
3366         for inst in non_primary_inst:
3367           test = inst in self.all_inst_info
3368           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3369                    "instance should not run on node %s", node_i.name)
3370           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3371                    "node is running unknown instance %s", inst)
3372
3373     for node, result in extra_lv_nvinfo.items():
3374       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3375                               node_image[node], vg_name)
3376
3377     feedback_fn("* Verifying instance status")
3378     for instance in self.my_inst_names:
3379       if verbose:
3380         feedback_fn("* Verifying instance %s" % instance)
3381       inst_config = self.my_inst_info[instance]
3382       self._VerifyInstance(instance, inst_config, node_image,
3383                            instdisk[instance])
3384       inst_nodes_offline = []
3385
3386       pnode = inst_config.primary_node
3387       pnode_img = node_image[pnode]
3388       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3389                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3390                " primary node failed", instance)
3391
3392       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3393                pnode_img.offline,
3394                constants.CV_EINSTANCEBADNODE, instance,
3395                "instance is marked as running and lives on offline node %s",
3396                inst_config.primary_node)
3397
3398       # If the instance is non-redundant we cannot survive losing its primary
3399       # node, so we are not N+1 compliant.
3400       if inst_config.disk_template not in constants.DTS_MIRRORED:
3401         i_non_redundant.append(instance)
3402
3403       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3404                constants.CV_EINSTANCELAYOUT,
3405                instance, "instance has multiple secondary nodes: %s",
3406                utils.CommaJoin(inst_config.secondary_nodes),
3407                code=self.ETYPE_WARNING)
3408
3409       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3410         pnode = inst_config.primary_node
3411         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3412         instance_groups = {}
3413
3414         for node in instance_nodes:
3415           instance_groups.setdefault(self.all_node_info[node].group,
3416                                      []).append(node)
3417
3418         pretty_list = [
3419           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3420           # Sort so that we always list the primary node first.
3421           for group, nodes in sorted(instance_groups.items(),
3422                                      key=lambda (_, nodes): pnode in nodes,
3423                                      reverse=True)]
3424
3425         self._ErrorIf(len(instance_groups) > 1,
3426                       constants.CV_EINSTANCESPLITGROUPS,
3427                       instance, "instance has primary and secondary nodes in"
3428                       " different groups: %s", utils.CommaJoin(pretty_list),
3429                       code=self.ETYPE_WARNING)
3430
3431       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3432         i_non_a_balanced.append(instance)
3433
3434       for snode in inst_config.secondary_nodes:
3435         s_img = node_image[snode]
3436         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3437                  snode, "instance %s, connection to secondary node failed",
3438                  instance)
3439
3440         if s_img.offline:
3441           inst_nodes_offline.append(snode)
3442
3443       # warn that the instance lives on offline nodes
3444       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3445                "instance has offline secondary node(s) %s",
3446                utils.CommaJoin(inst_nodes_offline))
3447       # ... or ghost/non-vm_capable nodes
3448       for node in inst_config.all_nodes:
3449         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3450                  instance, "instance lives on ghost node %s", node)
3451         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3452                  instance, "instance lives on non-vm_capable node %s", node)
3453
3454     feedback_fn("* Verifying orphan volumes")
3455     reserved = utils.FieldSet(*cluster.reserved_lvs)
3456
3457     # We will get spurious "unknown volume" warnings if any node of this group
3458     # is secondary for an instance whose primary is in another group. To avoid
3459     # them, we find these instances and add their volumes to node_vol_should.
3460     for inst in self.all_inst_info.values():
3461       for secondary in inst.secondary_nodes:
3462         if (secondary in self.my_node_info
3463             and inst.name not in self.my_inst_info):
3464           inst.MapLVsByNode(node_vol_should)
3465           break
3466
3467     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3468
3469     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3470       feedback_fn("* Verifying N+1 Memory redundancy")
3471       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3472
3473     feedback_fn("* Other Notes")
3474     if i_non_redundant:
3475       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3476                   % len(i_non_redundant))
3477
3478     if i_non_a_balanced:
3479       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3480                   % len(i_non_a_balanced))
3481
3482     if i_offline:
3483       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3484
3485     if n_offline:
3486       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3487
3488     if n_drained:
3489       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3490
3491     return not self.bad
3492
3493   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3494     """Analyze the post-hooks' result
3495
3496     This method analyses the hook result, handles it, and sends some
3497     nicely-formatted feedback back to the user.
3498
3499     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3500         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3501     @param hooks_results: the results of the multi-node hooks rpc call
3502     @param feedback_fn: function used send feedback back to the caller
3503     @param lu_result: previous Exec result
3504     @return: the new Exec result, based on the previous result
3505         and hook results
3506
3507     """
3508     # We only really run POST phase hooks, only for non-empty groups,
3509     # and are only interested in their results
3510     if not self.my_node_names:
3511       # empty node group
3512       pass
3513     elif phase == constants.HOOKS_PHASE_POST:
3514       # Used to change hooks' output to proper indentation
3515       feedback_fn("* Hooks Results")
3516       assert hooks_results, "invalid result from hooks"
3517
3518       for node_name in hooks_results:
3519         res = hooks_results[node_name]
3520         msg = res.fail_msg
3521         test = msg and not res.offline
3522         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3523                       "Communication failure in hooks execution: %s", msg)
3524         if res.offline or msg:
3525           # No need to investigate payload if node is offline or gave
3526           # an error.
3527           continue
3528         for script, hkr, output in res.payload:
3529           test = hkr == constants.HKR_FAIL
3530           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531                         "Script %s failed, output:", script)
3532           if test:
3533             output = self._HOOKS_INDENT_RE.sub("      ", output)
3534             feedback_fn("%s" % output)
3535             lu_result = False
3536
3537     return lu_result
3538
3539
3540 class LUClusterVerifyDisks(NoHooksLU):
3541   """Verifies the cluster disks status.
3542
3543   """
3544   REQ_BGL = False
3545
3546   def ExpandNames(self):
3547     self.share_locks = _ShareAll()
3548     self.needed_locks = {
3549       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3550       }
3551
3552   def Exec(self, feedback_fn):
3553     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3554
3555     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3556     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3557                            for group in group_names])
3558
3559
3560 class LUGroupVerifyDisks(NoHooksLU):
3561   """Verifies the status of all disks in a node group.
3562
3563   """
3564   REQ_BGL = False
3565
3566   def ExpandNames(self):
3567     # Raises errors.OpPrereqError on its own if group can't be found
3568     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3569
3570     self.share_locks = _ShareAll()
3571     self.needed_locks = {
3572       locking.LEVEL_INSTANCE: [],
3573       locking.LEVEL_NODEGROUP: [],
3574       locking.LEVEL_NODE: [],
3575       }
3576
3577   def DeclareLocks(self, level):
3578     if level == locking.LEVEL_INSTANCE:
3579       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3580
3581       # Lock instances optimistically, needs verification once node and group
3582       # locks have been acquired
3583       self.needed_locks[locking.LEVEL_INSTANCE] = \
3584         self.cfg.GetNodeGroupInstances(self.group_uuid)
3585
3586     elif level == locking.LEVEL_NODEGROUP:
3587       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3588
3589       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3590         set([self.group_uuid] +
3591             # Lock all groups used by instances optimistically; this requires
3592             # going via the node before it's locked, requiring verification
3593             # later on
3594             [group_uuid
3595              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3596              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3597
3598     elif level == locking.LEVEL_NODE:
3599       # This will only lock the nodes in the group to be verified which contain
3600       # actual instances
3601       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3602       self._LockInstancesNodes()
3603
3604       # Lock all nodes in group to be verified
3605       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3606       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3607       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3608
3609   def CheckPrereq(self):
3610     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3611     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3612     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3613
3614     assert self.group_uuid in owned_groups
3615
3616     # Check if locked instances are still correct
3617     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3618
3619     # Get instance information
3620     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3621
3622     # Check if node groups for locked instances are still correct
3623     _CheckInstancesNodeGroups(self.cfg, self.instances,
3624                               owned_groups, owned_nodes, self.group_uuid)
3625
3626   def Exec(self, feedback_fn):
3627     """Verify integrity of cluster disks.
3628
3629     @rtype: tuple of three items
3630     @return: a tuple of (dict of node-to-node_error, list of instances
3631         which need activate-disks, dict of instance: (node, volume) for
3632         missing volumes
3633
3634     """
3635     res_nodes = {}
3636     res_instances = set()
3637     res_missing = {}
3638
3639     nv_dict = _MapInstanceDisksToNodes(
3640       [inst for inst in self.instances.values()
3641        if inst.admin_state == constants.ADMINST_UP])
3642
3643     if nv_dict:
3644       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3645                              set(self.cfg.GetVmCapableNodeList()))
3646
3647       node_lvs = self.rpc.call_lv_list(nodes, [])
3648
3649       for (node, node_res) in node_lvs.items():
3650         if node_res.offline:
3651           continue
3652
3653         msg = node_res.fail_msg
3654         if msg:
3655           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3656           res_nodes[node] = msg
3657           continue
3658
3659         for lv_name, (_, _, lv_online) in node_res.payload.items():
3660           inst = nv_dict.pop((node, lv_name), None)
3661           if not (lv_online or inst is None):
3662             res_instances.add(inst)
3663
3664       # any leftover items in nv_dict are missing LVs, let's arrange the data
3665       # better
3666       for key, inst in nv_dict.iteritems():
3667         res_missing.setdefault(inst, []).append(list(key))
3668
3669     return (res_nodes, list(res_instances), res_missing)
3670
3671
3672 class LUClusterRepairDiskSizes(NoHooksLU):
3673   """Verifies the cluster disks sizes.
3674
3675   """
3676   REQ_BGL = False
3677
3678   def ExpandNames(self):
3679     if self.op.instances:
3680       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3681       self.needed_locks = {
3682         locking.LEVEL_NODE_RES: [],
3683         locking.LEVEL_INSTANCE: self.wanted_names,
3684         }
3685       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3686     else:
3687       self.wanted_names = None
3688       self.needed_locks = {
3689         locking.LEVEL_NODE_RES: locking.ALL_SET,
3690         locking.LEVEL_INSTANCE: locking.ALL_SET,
3691         }
3692     self.share_locks = {
3693       locking.LEVEL_NODE_RES: 1,
3694       locking.LEVEL_INSTANCE: 0,
3695       }
3696
3697   def DeclareLocks(self, level):
3698     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3699       self._LockInstancesNodes(primary_only=True, level=level)
3700
3701   def CheckPrereq(self):
3702     """Check prerequisites.
3703
3704     This only checks the optional instance list against the existing names.
3705
3706     """
3707     if self.wanted_names is None:
3708       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3709
3710     self.wanted_instances = \
3711         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3712
3713   def _EnsureChildSizes(self, disk):
3714     """Ensure children of the disk have the needed disk size.
3715
3716     This is valid mainly for DRBD8 and fixes an issue where the
3717     children have smaller disk size.
3718
3719     @param disk: an L{ganeti.objects.Disk} object
3720
3721     """
3722     if disk.dev_type == constants.LD_DRBD8:
3723       assert disk.children, "Empty children for DRBD8?"
3724       fchild = disk.children[0]
3725       mismatch = fchild.size < disk.size
3726       if mismatch:
3727         self.LogInfo("Child disk has size %d, parent %d, fixing",
3728                      fchild.size, disk.size)
3729         fchild.size = disk.size
3730
3731       # and we recurse on this child only, not on the metadev
3732       return self._EnsureChildSizes(fchild) or mismatch
3733     else:
3734       return False
3735
3736   def Exec(self, feedback_fn):
3737     """Verify the size of cluster disks.
3738
3739     """
3740     # TODO: check child disks too
3741     # TODO: check differences in size between primary/secondary nodes
3742     per_node_disks = {}
3743     for instance in self.wanted_instances:
3744       pnode = instance.primary_node
3745       if pnode not in per_node_disks:
3746         per_node_disks[pnode] = []
3747       for idx, disk in enumerate(instance.disks):
3748         per_node_disks[pnode].append((instance, idx, disk))
3749
3750     assert not (frozenset(per_node_disks.keys()) -
3751                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3752       "Not owning correct locks"
3753     assert not self.owned_locks(locking.LEVEL_NODE)
3754
3755     changed = []
3756     for node, dskl in per_node_disks.items():
3757       newl = [v[2].Copy() for v in dskl]
3758       for dsk in newl:
3759         self.cfg.SetDiskID(dsk, node)
3760       result = self.rpc.call_blockdev_getsize(node, newl)
3761       if result.fail_msg:
3762         self.LogWarning("Failure in blockdev_getsize call to node"
3763                         " %s, ignoring", node)
3764         continue
3765       if len(result.payload) != len(dskl):
3766         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3767                         " result.payload=%s", node, len(dskl), result.payload)
3768         self.LogWarning("Invalid result from node %s, ignoring node results",
3769                         node)
3770         continue
3771       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3772         if size is None:
3773           self.LogWarning("Disk %d of instance %s did not return size"
3774                           " information, ignoring", idx, instance.name)
3775           continue
3776         if not isinstance(size, (int, long)):
3777           self.LogWarning("Disk %d of instance %s did not return valid"
3778                           " size information, ignoring", idx, instance.name)
3779           continue
3780         size = size >> 20
3781         if size != disk.size:
3782           self.LogInfo("Disk %d of instance %s has mismatched size,"
3783                        " correcting: recorded %d, actual %d", idx,
3784                        instance.name, disk.size, size)
3785           disk.size = size
3786           self.cfg.Update(instance, feedback_fn)
3787           changed.append((instance.name, idx, size))
3788         if self._EnsureChildSizes(disk):
3789           self.cfg.Update(instance, feedback_fn)
3790           changed.append((instance.name, idx, disk.size))
3791     return changed
3792
3793
3794 class LUClusterRename(LogicalUnit):
3795   """Rename the cluster.
3796
3797   """
3798   HPATH = "cluster-rename"
3799   HTYPE = constants.HTYPE_CLUSTER
3800
3801   def BuildHooksEnv(self):
3802     """Build hooks env.
3803
3804     """
3805     return {
3806       "OP_TARGET": self.cfg.GetClusterName(),
3807       "NEW_NAME": self.op.name,
3808       }
3809
3810   def BuildHooksNodes(self):
3811     """Build hooks nodes.
3812
3813     """
3814     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3815
3816   def CheckPrereq(self):
3817     """Verify that the passed name is a valid one.
3818
3819     """
3820     hostname = netutils.GetHostname(name=self.op.name,
3821                                     family=self.cfg.GetPrimaryIPFamily())
3822
3823     new_name = hostname.name
3824     self.ip = new_ip = hostname.ip
3825     old_name = self.cfg.GetClusterName()
3826     old_ip = self.cfg.GetMasterIP()
3827     if new_name == old_name and new_ip == old_ip:
3828       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3829                                  " cluster has changed",
3830                                  errors.ECODE_INVAL)
3831     if new_ip != old_ip:
3832       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3833         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3834                                    " reachable on the network" %
3835                                    new_ip, errors.ECODE_NOTUNIQUE)
3836
3837     self.op.name = new_name
3838
3839   def Exec(self, feedback_fn):
3840     """Rename the cluster.
3841
3842     """
3843     clustername = self.op.name
3844     new_ip = self.ip
3845
3846     # shutdown the master IP
3847     master_params = self.cfg.GetMasterNetworkParameters()
3848     ems = self.cfg.GetUseExternalMipScript()
3849     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3850                                                      master_params, ems)
3851     result.Raise("Could not disable the master role")
3852
3853     try:
3854       cluster = self.cfg.GetClusterInfo()
3855       cluster.cluster_name = clustername
3856       cluster.master_ip = new_ip
3857       self.cfg.Update(cluster, feedback_fn)
3858
3859       # update the known hosts file
3860       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3861       node_list = self.cfg.GetOnlineNodeList()
3862       try:
3863         node_list.remove(master_params.name)
3864       except ValueError:
3865         pass
3866       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3867     finally:
3868       master_params.ip = new_ip
3869       result = self.rpc.call_node_activate_master_ip(master_params.name,
3870                                                      master_params, ems)
3871       msg = result.fail_msg
3872       if msg:
3873         self.LogWarning("Could not re-enable the master role on"
3874                         " the master, please restart manually: %s", msg)
3875
3876     return clustername
3877
3878
3879 def _ValidateNetmask(cfg, netmask):
3880   """Checks if a netmask is valid.
3881
3882   @type cfg: L{config.ConfigWriter}
3883   @param cfg: The cluster configuration
3884   @type netmask: int
3885   @param netmask: the netmask to be verified
3886   @raise errors.OpPrereqError: if the validation fails
3887
3888   """
3889   ip_family = cfg.GetPrimaryIPFamily()
3890   try:
3891     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3892   except errors.ProgrammerError:
3893     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3894                                ip_family, errors.ECODE_INVAL)
3895   if not ipcls.ValidateNetmask(netmask):
3896     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3897                                 (netmask), errors.ECODE_INVAL)
3898
3899
3900 class LUClusterSetParams(LogicalUnit):
3901   """Change the parameters of the cluster.
3902
3903   """
3904   HPATH = "cluster-modify"
3905   HTYPE = constants.HTYPE_CLUSTER
3906   REQ_BGL = False
3907
3908   def CheckArguments(self):
3909     """Check parameters
3910
3911     """
3912     if self.op.uid_pool:
3913       uidpool.CheckUidPool(self.op.uid_pool)
3914
3915     if self.op.add_uids:
3916       uidpool.CheckUidPool(self.op.add_uids)
3917
3918     if self.op.remove_uids:
3919       uidpool.CheckUidPool(self.op.remove_uids)
3920
3921     if self.op.master_netmask is not None:
3922       _ValidateNetmask(self.cfg, self.op.master_netmask)
3923
3924     if self.op.diskparams:
3925       for dt_params in self.op.diskparams.values():
3926         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3927       try:
3928         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3929       except errors.OpPrereqError, err:
3930         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3931                                    errors.ECODE_INVAL)
3932
3933   def ExpandNames(self):
3934     # FIXME: in the future maybe other cluster params won't require checking on
3935     # all nodes to be modified.
3936     self.needed_locks = {
3937       locking.LEVEL_NODE: locking.ALL_SET,
3938       locking.LEVEL_INSTANCE: locking.ALL_SET,
3939       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3940     }
3941     self.share_locks = {
3942         locking.LEVEL_NODE: 1,
3943         locking.LEVEL_INSTANCE: 1,
3944         locking.LEVEL_NODEGROUP: 1,
3945     }
3946
3947   def BuildHooksEnv(self):
3948     """Build hooks env.
3949
3950     """
3951     return {
3952       "OP_TARGET": self.cfg.GetClusterName(),
3953       "NEW_VG_NAME": self.op.vg_name,
3954       }
3955
3956   def BuildHooksNodes(self):
3957     """Build hooks nodes.
3958
3959     """
3960     mn = self.cfg.GetMasterNode()
3961     return ([mn], [mn])
3962
3963   def CheckPrereq(self):
3964     """Check prerequisites.
3965
3966     This checks whether the given params don't conflict and
3967     if the given volume group is valid.
3968
3969     """
3970     if self.op.vg_name is not None and not self.op.vg_name:
3971       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3972         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3973                                    " instances exist", errors.ECODE_INVAL)
3974
3975     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3976       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3977         raise errors.OpPrereqError("Cannot disable drbd helper while"
3978                                    " drbd-based instances exist",
3979                                    errors.ECODE_INVAL)
3980
3981     node_list = self.owned_locks(locking.LEVEL_NODE)
3982
3983     # if vg_name not None, checks given volume group on all nodes
3984     if self.op.vg_name:
3985       vglist = self.rpc.call_vg_list(node_list)
3986       for node in node_list:
3987         msg = vglist[node].fail_msg
3988         if msg:
3989           # ignoring down node
3990           self.LogWarning("Error while gathering data on node %s"
3991                           " (ignoring node): %s", node, msg)
3992           continue
3993         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3994                                               self.op.vg_name,
3995                                               constants.MIN_VG_SIZE)
3996         if vgstatus:
3997           raise errors.OpPrereqError("Error on node '%s': %s" %
3998                                      (node, vgstatus), errors.ECODE_ENVIRON)
3999
4000     if self.op.drbd_helper:
4001       # checks given drbd helper on all nodes
4002       helpers = self.rpc.call_drbd_helper(node_list)
4003       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4004         if ninfo.offline:
4005           self.LogInfo("Not checking drbd helper on offline node %s", node)
4006           continue
4007         msg = helpers[node].fail_msg
4008         if msg:
4009           raise errors.OpPrereqError("Error checking drbd helper on node"
4010                                      " '%s': %s" % (node, msg),
4011                                      errors.ECODE_ENVIRON)
4012         node_helper = helpers[node].payload
4013         if node_helper != self.op.drbd_helper:
4014           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4015                                      (node, node_helper), errors.ECODE_ENVIRON)
4016
4017     self.cluster = cluster = self.cfg.GetClusterInfo()
4018     # validate params changes
4019     if self.op.beparams:
4020       objects.UpgradeBeParams(self.op.beparams)
4021       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4022       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4023
4024     if self.op.ndparams:
4025       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4026       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4027
4028       # TODO: we need a more general way to handle resetting
4029       # cluster-level parameters to default values
4030       if self.new_ndparams["oob_program"] == "":
4031         self.new_ndparams["oob_program"] = \
4032             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4033
4034     if self.op.hv_state:
4035       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4036                                             self.cluster.hv_state_static)
4037       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4038                                for hv, values in new_hv_state.items())
4039
4040     if self.op.disk_state:
4041       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4042                                                 self.cluster.disk_state_static)
4043       self.new_disk_state = \
4044         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4045                             for name, values in svalues.items()))
4046              for storage, svalues in new_disk_state.items())
4047
4048     if self.op.ipolicy:
4049       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4050                                             group_policy=False)
4051
4052       all_instances = self.cfg.GetAllInstancesInfo().values()
4053       violations = set()
4054       for group in self.cfg.GetAllNodeGroupsInfo().values():
4055         instances = frozenset([inst for inst in all_instances
4056                                if compat.any(node in group.members
4057                                              for node in inst.all_nodes)])
4058         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4059         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4060         new = _ComputeNewInstanceViolations(ipol,
4061                                             new_ipolicy, instances)
4062         if new:
4063           violations.update(new)
4064
4065       if violations:
4066         self.LogWarning("After the ipolicy change the following instances"
4067                         " violate them: %s",
4068                         utils.CommaJoin(utils.NiceSort(violations)))
4069
4070     if self.op.nicparams:
4071       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4072       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4073       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4074       nic_errors = []
4075
4076       # check all instances for consistency
4077       for instance in self.cfg.GetAllInstancesInfo().values():
4078         for nic_idx, nic in enumerate(instance.nics):
4079           params_copy = copy.deepcopy(nic.nicparams)
4080           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4081
4082           # check parameter syntax
4083           try:
4084             objects.NIC.CheckParameterSyntax(params_filled)
4085           except errors.ConfigurationError, err:
4086             nic_errors.append("Instance %s, nic/%d: %s" %
4087                               (instance.name, nic_idx, err))
4088
4089           # if we're moving instances to routed, check that they have an ip
4090           target_mode = params_filled[constants.NIC_MODE]
4091           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4092             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4093                               " address" % (instance.name, nic_idx))
4094       if nic_errors:
4095         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4096                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4097
4098     # hypervisor list/parameters
4099     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4100     if self.op.hvparams:
4101       for hv_name, hv_dict in self.op.hvparams.items():
4102         if hv_name not in self.new_hvparams:
4103           self.new_hvparams[hv_name] = hv_dict
4104         else:
4105           self.new_hvparams[hv_name].update(hv_dict)
4106
4107     # disk template parameters
4108     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4109     if self.op.diskparams:
4110       for dt_name, dt_params in self.op.diskparams.items():
4111         if dt_name not in self.op.diskparams:
4112           self.new_diskparams[dt_name] = dt_params
4113         else:
4114           self.new_diskparams[dt_name].update(dt_params)
4115
4116     # os hypervisor parameters
4117     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4118     if self.op.os_hvp:
4119       for os_name, hvs in self.op.os_hvp.items():
4120         if os_name not in self.new_os_hvp:
4121           self.new_os_hvp[os_name] = hvs
4122         else:
4123           for hv_name, hv_dict in hvs.items():
4124             if hv_name not in self.new_os_hvp[os_name]:
4125               self.new_os_hvp[os_name][hv_name] = hv_dict
4126             else:
4127               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4128
4129     # os parameters
4130     self.new_osp = objects.FillDict(cluster.osparams, {})
4131     if self.op.osparams:
4132       for os_name, osp in self.op.osparams.items():
4133         if os_name not in self.new_osp:
4134           self.new_osp[os_name] = {}
4135
4136         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4137                                                   use_none=True)
4138
4139         if not self.new_osp[os_name]:
4140           # we removed all parameters
4141           del self.new_osp[os_name]
4142         else:
4143           # check the parameter validity (remote check)
4144           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4145                          os_name, self.new_osp[os_name])
4146
4147     # changes to the hypervisor list
4148     if self.op.enabled_hypervisors is not None:
4149       self.hv_list = self.op.enabled_hypervisors
4150       for hv in self.hv_list:
4151         # if the hypervisor doesn't already exist in the cluster
4152         # hvparams, we initialize it to empty, and then (in both
4153         # cases) we make sure to fill the defaults, as we might not
4154         # have a complete defaults list if the hypervisor wasn't
4155         # enabled before
4156         if hv not in new_hvp:
4157           new_hvp[hv] = {}
4158         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4159         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4160     else:
4161       self.hv_list = cluster.enabled_hypervisors
4162
4163     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4164       # either the enabled list has changed, or the parameters have, validate
4165       for hv_name, hv_params in self.new_hvparams.items():
4166         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4167             (self.op.enabled_hypervisors and
4168              hv_name in self.op.enabled_hypervisors)):
4169           # either this is a new hypervisor, or its parameters have changed
4170           hv_class = hypervisor.GetHypervisor(hv_name)
4171           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4172           hv_class.CheckParameterSyntax(hv_params)
4173           _CheckHVParams(self, node_list, hv_name, hv_params)
4174
4175     if self.op.os_hvp:
4176       # no need to check any newly-enabled hypervisors, since the
4177       # defaults have already been checked in the above code-block
4178       for os_name, os_hvp in self.new_os_hvp.items():
4179         for hv_name, hv_params in os_hvp.items():
4180           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4181           # we need to fill in the new os_hvp on top of the actual hv_p
4182           cluster_defaults = self.new_hvparams.get(hv_name, {})
4183           new_osp = objects.FillDict(cluster_defaults, hv_params)
4184           hv_class = hypervisor.GetHypervisor(hv_name)
4185           hv_class.CheckParameterSyntax(new_osp)
4186           _CheckHVParams(self, node_list, hv_name, new_osp)
4187
4188     if self.op.default_iallocator:
4189       alloc_script = utils.FindFile(self.op.default_iallocator,
4190                                     constants.IALLOCATOR_SEARCH_PATH,
4191                                     os.path.isfile)
4192       if alloc_script is None:
4193         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4194                                    " specified" % self.op.default_iallocator,
4195                                    errors.ECODE_INVAL)
4196
4197   def Exec(self, feedback_fn):
4198     """Change the parameters of the cluster.
4199
4200     """
4201     if self.op.vg_name is not None:
4202       new_volume = self.op.vg_name
4203       if not new_volume:
4204         new_volume = None
4205       if new_volume != self.cfg.GetVGName():
4206         self.cfg.SetVGName(new_volume)
4207       else:
4208         feedback_fn("Cluster LVM configuration already in desired"
4209                     " state, not changing")
4210     if self.op.drbd_helper is not None:
4211       new_helper = self.op.drbd_helper
4212       if not new_helper:
4213         new_helper = None
4214       if new_helper != self.cfg.GetDRBDHelper():
4215         self.cfg.SetDRBDHelper(new_helper)
4216       else:
4217         feedback_fn("Cluster DRBD helper already in desired state,"
4218                     " not changing")
4219     if self.op.hvparams:
4220       self.cluster.hvparams = self.new_hvparams
4221     if self.op.os_hvp:
4222       self.cluster.os_hvp = self.new_os_hvp
4223     if self.op.enabled_hypervisors is not None:
4224       self.cluster.hvparams = self.new_hvparams
4225       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4226     if self.op.beparams:
4227       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4228     if self.op.nicparams:
4229       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4230     if self.op.ipolicy:
4231       self.cluster.ipolicy = self.new_ipolicy
4232     if self.op.osparams:
4233       self.cluster.osparams = self.new_osp
4234     if self.op.ndparams:
4235       self.cluster.ndparams = self.new_ndparams
4236     if self.op.diskparams:
4237       self.cluster.diskparams = self.new_diskparams
4238     if self.op.hv_state:
4239       self.cluster.hv_state_static = self.new_hv_state
4240     if self.op.disk_state:
4241       self.cluster.disk_state_static = self.new_disk_state
4242
4243     if self.op.candidate_pool_size is not None:
4244       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4245       # we need to update the pool size here, otherwise the save will fail
4246       _AdjustCandidatePool(self, [])
4247
4248     if self.op.maintain_node_health is not None:
4249       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4250         feedback_fn("Note: CONFD was disabled at build time, node health"
4251                     " maintenance is not useful (still enabling it)")
4252       self.cluster.maintain_node_health = self.op.maintain_node_health
4253
4254     if self.op.prealloc_wipe_disks is not None:
4255       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4256
4257     if self.op.add_uids is not None:
4258       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4259
4260     if self.op.remove_uids is not None:
4261       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4262
4263     if self.op.uid_pool is not None:
4264       self.cluster.uid_pool = self.op.uid_pool
4265
4266     if self.op.default_iallocator is not None:
4267       self.cluster.default_iallocator = self.op.default_iallocator
4268
4269     if self.op.reserved_lvs is not None:
4270       self.cluster.reserved_lvs = self.op.reserved_lvs
4271
4272     if self.op.use_external_mip_script is not None:
4273       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4274
4275     def helper_os(aname, mods, desc):
4276       desc += " OS list"
4277       lst = getattr(self.cluster, aname)
4278       for key, val in mods:
4279         if key == constants.DDM_ADD:
4280           if val in lst:
4281             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4282           else:
4283             lst.append(val)
4284         elif key == constants.DDM_REMOVE:
4285           if val in lst:
4286             lst.remove(val)
4287           else:
4288             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4289         else:
4290           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4291
4292     if self.op.hidden_os:
4293       helper_os("hidden_os", self.op.hidden_os, "hidden")
4294
4295     if self.op.blacklisted_os:
4296       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4297
4298     if self.op.master_netdev:
4299       master_params = self.cfg.GetMasterNetworkParameters()
4300       ems = self.cfg.GetUseExternalMipScript()
4301       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4302                   self.cluster.master_netdev)
4303       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4304                                                        master_params, ems)
4305       result.Raise("Could not disable the master ip")
4306       feedback_fn("Changing master_netdev from %s to %s" %
4307                   (master_params.netdev, self.op.master_netdev))
4308       self.cluster.master_netdev = self.op.master_netdev
4309
4310     if self.op.master_netmask:
4311       master_params = self.cfg.GetMasterNetworkParameters()
4312       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4313       result = self.rpc.call_node_change_master_netmask(master_params.name,
4314                                                         master_params.netmask,
4315                                                         self.op.master_netmask,
4316                                                         master_params.ip,
4317                                                         master_params.netdev)
4318       if result.fail_msg:
4319         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4320         feedback_fn(msg)
4321
4322       self.cluster.master_netmask = self.op.master_netmask
4323
4324     self.cfg.Update(self.cluster, feedback_fn)
4325
4326     if self.op.master_netdev:
4327       master_params = self.cfg.GetMasterNetworkParameters()
4328       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4329                   self.op.master_netdev)
4330       ems = self.cfg.GetUseExternalMipScript()
4331       result = self.rpc.call_node_activate_master_ip(master_params.name,
4332                                                      master_params, ems)
4333       if result.fail_msg:
4334         self.LogWarning("Could not re-enable the master ip on"
4335                         " the master, please restart manually: %s",
4336                         result.fail_msg)
4337
4338
4339 def _UploadHelper(lu, nodes, fname):
4340   """Helper for uploading a file and showing warnings.
4341
4342   """
4343   if os.path.exists(fname):
4344     result = lu.rpc.call_upload_file(nodes, fname)
4345     for to_node, to_result in result.items():
4346       msg = to_result.fail_msg
4347       if msg:
4348         msg = ("Copy of file %s to node %s failed: %s" %
4349                (fname, to_node, msg))
4350         lu.proc.LogWarning(msg)
4351
4352
4353 def _ComputeAncillaryFiles(cluster, redist):
4354   """Compute files external to Ganeti which need to be consistent.
4355
4356   @type redist: boolean
4357   @param redist: Whether to include files which need to be redistributed
4358
4359   """
4360   # Compute files for all nodes
4361   files_all = set([
4362     pathutils.SSH_KNOWN_HOSTS_FILE,
4363     pathutils.CONFD_HMAC_KEY,
4364     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4365     pathutils.SPICE_CERT_FILE,
4366     pathutils.SPICE_CACERT_FILE,
4367     pathutils.RAPI_USERS_FILE,
4368     ])
4369
4370   if redist:
4371     # we need to ship at least the RAPI certificate
4372     files_all.add(pathutils.RAPI_CERT_FILE)
4373   else:
4374     files_all.update(pathutils.ALL_CERT_FILES)
4375     files_all.update(ssconf.SimpleStore().GetFileList())
4376
4377   if cluster.modify_etc_hosts:
4378     files_all.add(pathutils.ETC_HOSTS)
4379
4380   if cluster.use_external_mip_script:
4381     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4382
4383   # Files which are optional, these must:
4384   # - be present in one other category as well
4385   # - either exist or not exist on all nodes of that category (mc, vm all)
4386   files_opt = set([
4387     pathutils.RAPI_USERS_FILE,
4388     ])
4389
4390   # Files which should only be on master candidates
4391   files_mc = set()
4392
4393   if not redist:
4394     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4395
4396   # File storage
4397   if (not redist and
4398       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4399     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4400     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4401
4402   # Files which should only be on VM-capable nodes
4403   files_vm = set(
4404     filename
4405     for hv_name in cluster.enabled_hypervisors
4406     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4407
4408   files_opt |= set(
4409     filename
4410     for hv_name in cluster.enabled_hypervisors
4411     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4412
4413   # Filenames in each category must be unique
4414   all_files_set = files_all | files_mc | files_vm
4415   assert (len(all_files_set) ==
4416           sum(map(len, [files_all, files_mc, files_vm]))), \
4417          "Found file listed in more than one file list"
4418
4419   # Optional files must be present in one other category
4420   assert all_files_set.issuperset(files_opt), \
4421          "Optional file not in a different required list"
4422
4423   # This one file should never ever be re-distributed via RPC
4424   assert not (redist and
4425               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4426
4427   return (files_all, files_opt, files_mc, files_vm)
4428
4429
4430 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4431   """Distribute additional files which are part of the cluster configuration.
4432
4433   ConfigWriter takes care of distributing the config and ssconf files, but
4434   there are more files which should be distributed to all nodes. This function
4435   makes sure those are copied.
4436
4437   @param lu: calling logical unit
4438   @param additional_nodes: list of nodes not in the config to distribute to
4439   @type additional_vm: boolean
4440   @param additional_vm: whether the additional nodes are vm-capable or not
4441
4442   """
4443   # Gather target nodes
4444   cluster = lu.cfg.GetClusterInfo()
4445   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4446
4447   online_nodes = lu.cfg.GetOnlineNodeList()
4448   online_set = frozenset(online_nodes)
4449   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4450
4451   if additional_nodes is not None:
4452     online_nodes.extend(additional_nodes)
4453     if additional_vm:
4454       vm_nodes.extend(additional_nodes)
4455
4456   # Never distribute to master node
4457   for nodelist in [online_nodes, vm_nodes]:
4458     if master_info.name in nodelist:
4459       nodelist.remove(master_info.name)
4460
4461   # Gather file lists
4462   (files_all, _, files_mc, files_vm) = \
4463     _ComputeAncillaryFiles(cluster, True)
4464
4465   # Never re-distribute configuration file from here
4466   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4467               pathutils.CLUSTER_CONF_FILE in files_vm)
4468   assert not files_mc, "Master candidates not handled in this function"
4469
4470   filemap = [
4471     (online_nodes, files_all),
4472     (vm_nodes, files_vm),
4473     ]
4474
4475   # Upload the files
4476   for (node_list, files) in filemap:
4477     for fname in files:
4478       _UploadHelper(lu, node_list, fname)
4479
4480
4481 class LUClusterRedistConf(NoHooksLU):
4482   """Force the redistribution of cluster configuration.
4483
4484   This is a very simple LU.
4485
4486   """
4487   REQ_BGL = False
4488
4489   def ExpandNames(self):
4490     self.needed_locks = {
4491       locking.LEVEL_NODE: locking.ALL_SET,
4492     }
4493     self.share_locks[locking.LEVEL_NODE] = 1
4494
4495   def Exec(self, feedback_fn):
4496     """Redistribute the configuration.
4497
4498     """
4499     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4500     _RedistributeAncillaryFiles(self)
4501
4502
4503 class LUClusterActivateMasterIp(NoHooksLU):
4504   """Activate the master IP on the master node.
4505
4506   """
4507   def Exec(self, feedback_fn):
4508     """Activate the master IP.
4509
4510     """
4511     master_params = self.cfg.GetMasterNetworkParameters()
4512     ems = self.cfg.GetUseExternalMipScript()
4513     result = self.rpc.call_node_activate_master_ip(master_params.name,
4514                                                    master_params, ems)
4515     result.Raise("Could not activate the master IP")
4516
4517
4518 class LUClusterDeactivateMasterIp(NoHooksLU):
4519   """Deactivate the master IP on the master node.
4520
4521   """
4522   def Exec(self, feedback_fn):
4523     """Deactivate the master IP.
4524
4525     """
4526     master_params = self.cfg.GetMasterNetworkParameters()
4527     ems = self.cfg.GetUseExternalMipScript()
4528     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4529                                                      master_params, ems)
4530     result.Raise("Could not deactivate the master IP")
4531
4532
4533 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4534   """Sleep and poll for an instance's disk to sync.
4535
4536   """
4537   if not instance.disks or disks is not None and not disks:
4538     return True
4539
4540   disks = _ExpandCheckDisks(instance, disks)
4541
4542   if not oneshot:
4543     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4544
4545   node = instance.primary_node
4546
4547   for dev in disks:
4548     lu.cfg.SetDiskID(dev, node)
4549
4550   # TODO: Convert to utils.Retry
4551
4552   retries = 0
4553   degr_retries = 10 # in seconds, as we sleep 1 second each time
4554   while True:
4555     max_time = 0
4556     done = True
4557     cumul_degraded = False
4558     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4559     msg = rstats.fail_msg
4560     if msg:
4561       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4562       retries += 1
4563       if retries >= 10:
4564         raise errors.RemoteError("Can't contact node %s for mirror data,"
4565                                  " aborting." % node)
4566       time.sleep(6)
4567       continue
4568     rstats = rstats.payload
4569     retries = 0
4570     for i, mstat in enumerate(rstats):
4571       if mstat is None:
4572         lu.LogWarning("Can't compute data for node %s/%s",
4573                            node, disks[i].iv_name)
4574         continue
4575
4576       cumul_degraded = (cumul_degraded or
4577                         (mstat.is_degraded and mstat.sync_percent is None))
4578       if mstat.sync_percent is not None:
4579         done = False
4580         if mstat.estimated_time is not None:
4581           rem_time = ("%s remaining (estimated)" %
4582                       utils.FormatSeconds(mstat.estimated_time))
4583           max_time = mstat.estimated_time
4584         else:
4585           rem_time = "no time estimate"
4586         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4587                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4588
4589     # if we're done but degraded, let's do a few small retries, to
4590     # make sure we see a stable and not transient situation; therefore
4591     # we force restart of the loop
4592     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4593       logging.info("Degraded disks found, %d retries left", degr_retries)
4594       degr_retries -= 1
4595       time.sleep(1)
4596       continue
4597
4598     if done or oneshot:
4599       break
4600
4601     time.sleep(min(60, max_time))
4602
4603   if done:
4604     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4605   return not cumul_degraded
4606
4607
4608 def _BlockdevFind(lu, node, dev, instance):
4609   """Wrapper around call_blockdev_find to annotate diskparams.
4610
4611   @param lu: A reference to the lu object
4612   @param node: The node to call out
4613   @param dev: The device to find
4614   @param instance: The instance object the device belongs to
4615   @returns The result of the rpc call
4616
4617   """
4618   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4619   return lu.rpc.call_blockdev_find(node, disk)
4620
4621
4622 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4623   """Wrapper around L{_CheckDiskConsistencyInner}.
4624
4625   """
4626   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4627   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4628                                     ldisk=ldisk)
4629
4630
4631 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4632                                ldisk=False):
4633   """Check that mirrors are not degraded.
4634
4635   @attention: The device has to be annotated already.
4636
4637   The ldisk parameter, if True, will change the test from the
4638   is_degraded attribute (which represents overall non-ok status for
4639   the device(s)) to the ldisk (representing the local storage status).
4640
4641   """
4642   lu.cfg.SetDiskID(dev, node)
4643
4644   result = True
4645
4646   if on_primary or dev.AssembleOnSecondary():
4647     rstats = lu.rpc.call_blockdev_find(node, dev)
4648     msg = rstats.fail_msg
4649     if msg:
4650       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4651       result = False
4652     elif not rstats.payload:
4653       lu.LogWarning("Can't find disk on node %s", node)
4654       result = False
4655     else:
4656       if ldisk:
4657         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4658       else:
4659         result = result and not rstats.payload.is_degraded
4660
4661   if dev.children:
4662     for child in dev.children:
4663       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4664                                                      on_primary)
4665
4666   return result
4667
4668
4669 class LUOobCommand(NoHooksLU):
4670   """Logical unit for OOB handling.
4671
4672   """
4673   REQ_BGL = False
4674   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4675
4676   def ExpandNames(self):
4677     """Gather locks we need.
4678
4679     """
4680     if self.op.node_names:
4681       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4682       lock_names = self.op.node_names
4683     else:
4684       lock_names = locking.ALL_SET
4685
4686     self.needed_locks = {
4687       locking.LEVEL_NODE: lock_names,
4688       }
4689
4690   def CheckPrereq(self):
4691     """Check prerequisites.
4692
4693     This checks:
4694      - the node exists in the configuration
4695      - OOB is supported
4696
4697     Any errors are signaled by raising errors.OpPrereqError.
4698
4699     """
4700     self.nodes = []
4701     self.master_node = self.cfg.GetMasterNode()
4702
4703     assert self.op.power_delay >= 0.0
4704
4705     if self.op.node_names:
4706       if (self.op.command in self._SKIP_MASTER and
4707           self.master_node in self.op.node_names):
4708         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4709         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4710
4711         if master_oob_handler:
4712           additional_text = ("run '%s %s %s' if you want to operate on the"
4713                              " master regardless") % (master_oob_handler,
4714                                                       self.op.command,
4715                                                       self.master_node)
4716         else:
4717           additional_text = "it does not support out-of-band operations"
4718
4719         raise errors.OpPrereqError(("Operating on the master node %s is not"
4720                                     " allowed for %s; %s") %
4721                                    (self.master_node, self.op.command,
4722                                     additional_text), errors.ECODE_INVAL)
4723     else:
4724       self.op.node_names = self.cfg.GetNodeList()
4725       if self.op.command in self._SKIP_MASTER:
4726         self.op.node_names.remove(self.master_node)
4727
4728     if self.op.command in self._SKIP_MASTER:
4729       assert self.master_node not in self.op.node_names
4730
4731     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4732       if node is None:
4733         raise errors.OpPrereqError("Node %s not found" % node_name,
4734                                    errors.ECODE_NOENT)
4735       else:
4736         self.nodes.append(node)
4737
4738       if (not self.op.ignore_status and
4739           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4740         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4741                                     " not marked offline") % node_name,
4742                                    errors.ECODE_STATE)
4743
4744   def Exec(self, feedback_fn):
4745     """Execute OOB and return result if we expect any.
4746
4747     """
4748     master_node = self.master_node
4749     ret = []
4750
4751     for idx, node in enumerate(utils.NiceSort(self.nodes,
4752                                               key=lambda node: node.name)):
4753       node_entry = [(constants.RS_NORMAL, node.name)]
4754       ret.append(node_entry)
4755
4756       oob_program = _SupportsOob(self.cfg, node)
4757
4758       if not oob_program:
4759         node_entry.append((constants.RS_UNAVAIL, None))
4760         continue
4761
4762       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4763                    self.op.command, oob_program, node.name)
4764       result = self.rpc.call_run_oob(master_node, oob_program,
4765                                      self.op.command, node.name,
4766                                      self.op.timeout)
4767
4768       if result.fail_msg:
4769         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4770                         node.name, result.fail_msg)
4771         node_entry.append((constants.RS_NODATA, None))
4772       else:
4773         try:
4774           self._CheckPayload(result)
4775         except errors.OpExecError, err:
4776           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4777                           node.name, err)
4778           node_entry.append((constants.RS_NODATA, None))
4779         else:
4780           if self.op.command == constants.OOB_HEALTH:
4781             # For health we should log important events
4782             for item, status in result.payload:
4783               if status in [constants.OOB_STATUS_WARNING,
4784                             constants.OOB_STATUS_CRITICAL]:
4785                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4786                                 item, node.name, status)
4787
4788           if self.op.command == constants.OOB_POWER_ON:
4789             node.powered = True
4790           elif self.op.command == constants.OOB_POWER_OFF:
4791             node.powered = False
4792           elif self.op.command == constants.OOB_POWER_STATUS:
4793             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4794             if powered != node.powered:
4795               logging.warning(("Recorded power state (%s) of node '%s' does not"
4796                                " match actual power state (%s)"), node.powered,
4797                               node.name, powered)
4798
4799           # For configuration changing commands we should update the node
4800           if self.op.command in (constants.OOB_POWER_ON,
4801                                  constants.OOB_POWER_OFF):
4802             self.cfg.Update(node, feedback_fn)
4803
4804           node_entry.append((constants.RS_NORMAL, result.payload))
4805
4806           if (self.op.command == constants.OOB_POWER_ON and
4807               idx < len(self.nodes) - 1):
4808             time.sleep(self.op.power_delay)
4809
4810     return ret
4811
4812   def _CheckPayload(self, result):
4813     """Checks if the payload is valid.
4814
4815     @param result: RPC result
4816     @raises errors.OpExecError: If payload is not valid
4817
4818     """
4819     errs = []
4820     if self.op.command == constants.OOB_HEALTH:
4821       if not isinstance(result.payload, list):
4822         errs.append("command 'health' is expected to return a list but got %s" %
4823                     type(result.payload))
4824       else:
4825         for item, status in result.payload:
4826           if status not in constants.OOB_STATUSES:
4827             errs.append("health item '%s' has invalid status '%s'" %
4828                         (item, status))
4829
4830     if self.op.command == constants.OOB_POWER_STATUS:
4831       if not isinstance(result.payload, dict):
4832         errs.append("power-status is expected to return a dict but got %s" %
4833                     type(result.payload))
4834
4835     if self.op.command in [
4836       constants.OOB_POWER_ON,
4837       constants.OOB_POWER_OFF,
4838       constants.OOB_POWER_CYCLE,
4839       ]:
4840       if result.payload is not None:
4841         errs.append("%s is expected to not return payload but got '%s'" %
4842                     (self.op.command, result.payload))
4843
4844     if errs:
4845       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4846                                utils.CommaJoin(errs))
4847
4848
4849 class _OsQuery(_QueryBase):
4850   FIELDS = query.OS_FIELDS
4851
4852   def ExpandNames(self, lu):
4853     # Lock all nodes in shared mode
4854     # Temporary removal of locks, should be reverted later
4855     # TODO: reintroduce locks when they are lighter-weight
4856     lu.needed_locks = {}
4857     #self.share_locks[locking.LEVEL_NODE] = 1
4858     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4859
4860     # The following variables interact with _QueryBase._GetNames
4861     if self.names:
4862       self.wanted = self.names
4863     else:
4864       self.wanted = locking.ALL_SET
4865
4866     self.do_locking = self.use_locking
4867
4868   def DeclareLocks(self, lu, level):
4869     pass
4870
4871   @staticmethod
4872   def _DiagnoseByOS(rlist):
4873     """Remaps a per-node return list into an a per-os per-node dictionary
4874
4875     @param rlist: a map with node names as keys and OS objects as values
4876
4877     @rtype: dict
4878     @return: a dictionary with osnames as keys and as value another
4879         map, with nodes as keys and tuples of (path, status, diagnose,
4880         variants, parameters, api_versions) as values, eg::
4881
4882           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4883                                      (/srv/..., False, "invalid api")],
4884                            "node2": [(/srv/..., True, "", [], [])]}
4885           }
4886
4887     """
4888     all_os = {}
4889     # we build here the list of nodes that didn't fail the RPC (at RPC
4890     # level), so that nodes with a non-responding node daemon don't
4891     # make all OSes invalid
4892     good_nodes = [node_name for node_name in rlist
4893                   if not rlist[node_name].fail_msg]
4894     for node_name, nr in rlist.items():
4895       if nr.fail_msg or not nr.payload:
4896         continue
4897       for (name, path, status, diagnose, variants,
4898            params, api_versions) in nr.payload:
4899         if name not in all_os:
4900           # build a list of nodes for this os containing empty lists
4901           # for each node in node_list
4902           all_os[name] = {}
4903           for nname in good_nodes:
4904             all_os[name][nname] = []
4905         # convert params from [name, help] to (name, help)
4906         params = [tuple(v) for v in params]
4907         all_os[name][node_name].append((path, status, diagnose,
4908                                         variants, params, api_versions))
4909     return all_os
4910
4911   def _GetQueryData(self, lu):
4912     """Computes the list of nodes and their attributes.
4913
4914     """
4915     # Locking is not used
4916     assert not (compat.any(lu.glm.is_owned(level)
4917                            for level in locking.LEVELS
4918                            if level != locking.LEVEL_CLUSTER) or
4919                 self.do_locking or self.use_locking)
4920
4921     valid_nodes = [node.name
4922                    for node in lu.cfg.GetAllNodesInfo().values()
4923                    if not node.offline and node.vm_capable]
4924     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4925     cluster = lu.cfg.GetClusterInfo()
4926
4927     data = {}
4928
4929     for (os_name, os_data) in pol.items():
4930       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4931                           hidden=(os_name in cluster.hidden_os),
4932                           blacklisted=(os_name in cluster.blacklisted_os))
4933
4934       variants = set()
4935       parameters = set()
4936       api_versions = set()
4937
4938       for idx, osl in enumerate(os_data.values()):
4939         info.valid = bool(info.valid and osl and osl[0][1])
4940         if not info.valid:
4941           break
4942
4943         (node_variants, node_params, node_api) = osl[0][3:6]
4944         if idx == 0:
4945           # First entry
4946           variants.update(node_variants)
4947           parameters.update(node_params)
4948           api_versions.update(node_api)
4949         else:
4950           # Filter out inconsistent values
4951           variants.intersection_update(node_variants)
4952           parameters.intersection_update(node_params)
4953           api_versions.intersection_update(node_api)
4954
4955       info.variants = list(variants)
4956       info.parameters = list(parameters)
4957       info.api_versions = list(api_versions)
4958
4959       data[os_name] = info
4960
4961     # Prepare data in requested order
4962     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4963             if name in data]
4964
4965
4966 class LUOsDiagnose(NoHooksLU):
4967   """Logical unit for OS diagnose/query.
4968
4969   """
4970   REQ_BGL = False
4971
4972   @staticmethod
4973   def _BuildFilter(fields, names):
4974     """Builds a filter for querying OSes.
4975
4976     """
4977     name_filter = qlang.MakeSimpleFilter("name", names)
4978
4979     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4980     # respective field is not requested
4981     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4982                      for fname in ["hidden", "blacklisted"]
4983                      if fname not in fields]
4984     if "valid" not in fields:
4985       status_filter.append([qlang.OP_TRUE, "valid"])
4986
4987     if status_filter:
4988       status_filter.insert(0, qlang.OP_AND)
4989     else:
4990       status_filter = None
4991
4992     if name_filter and status_filter:
4993       return [qlang.OP_AND, name_filter, status_filter]
4994     elif name_filter:
4995       return name_filter
4996     else:
4997       return status_filter
4998
4999   def CheckArguments(self):
5000     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5001                        self.op.output_fields, False)
5002
5003   def ExpandNames(self):
5004     self.oq.ExpandNames(self)
5005
5006   def Exec(self, feedback_fn):
5007     return self.oq.OldStyleQuery(self)
5008
5009
5010 class LUNodeRemove(LogicalUnit):
5011   """Logical unit for removing a node.
5012
5013   """
5014   HPATH = "node-remove"
5015   HTYPE = constants.HTYPE_NODE
5016
5017   def BuildHooksEnv(self):
5018     """Build hooks env.
5019
5020     """
5021     return {
5022       "OP_TARGET": self.op.node_name,
5023       "NODE_NAME": self.op.node_name,
5024       }
5025
5026   def BuildHooksNodes(self):
5027     """Build hooks nodes.
5028
5029     This doesn't run on the target node in the pre phase as a failed
5030     node would then be impossible to remove.
5031
5032     """
5033     all_nodes = self.cfg.GetNodeList()
5034     try:
5035       all_nodes.remove(self.op.node_name)
5036     except ValueError:
5037       pass
5038     return (all_nodes, all_nodes)
5039
5040   def CheckPrereq(self):
5041     """Check prerequisites.
5042
5043     This checks:
5044      - the node exists in the configuration
5045      - it does not have primary or secondary instances
5046      - it's not the master
5047
5048     Any errors are signaled by raising errors.OpPrereqError.
5049
5050     """
5051     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5052     node = self.cfg.GetNodeInfo(self.op.node_name)
5053     assert node is not None
5054
5055     masternode = self.cfg.GetMasterNode()
5056     if node.name == masternode:
5057       raise errors.OpPrereqError("Node is the master node, failover to another"
5058                                  " node is required", errors.ECODE_INVAL)
5059
5060     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5061       if node.name in instance.all_nodes:
5062         raise errors.OpPrereqError("Instance %s is still running on the node,"
5063                                    " please remove first" % instance_name,
5064                                    errors.ECODE_INVAL)
5065     self.op.node_name = node.name
5066     self.node = node
5067
5068   def Exec(self, feedback_fn):
5069     """Removes the node from the cluster.
5070
5071     """
5072     node = self.node
5073     logging.info("Stopping the node daemon and removing configs from node %s",
5074                  node.name)
5075
5076     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5077
5078     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5079       "Not owning BGL"
5080
5081     # Promote nodes to master candidate as needed
5082     _AdjustCandidatePool(self, exceptions=[node.name])
5083     self.context.RemoveNode(node.name)
5084
5085     # Run post hooks on the node before it's removed
5086     _RunPostHook(self, node.name)
5087
5088     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5089     msg = result.fail_msg
5090     if msg:
5091       self.LogWarning("Errors encountered on the remote node while leaving"
5092                       " the cluster: %s", msg)
5093
5094     # Remove node from our /etc/hosts
5095     if self.cfg.GetClusterInfo().modify_etc_hosts:
5096       master_node = self.cfg.GetMasterNode()
5097       result = self.rpc.call_etc_hosts_modify(master_node,
5098                                               constants.ETC_HOSTS_REMOVE,
5099                                               node.name, None)
5100       result.Raise("Can't update hosts file with new host data")
5101       _RedistributeAncillaryFiles(self)
5102
5103
5104 class _NodeQuery(_QueryBase):
5105   FIELDS = query.NODE_FIELDS
5106
5107   def ExpandNames(self, lu):
5108     lu.needed_locks = {}
5109     lu.share_locks = _ShareAll()
5110
5111     if self.names:
5112       self.wanted = _GetWantedNodes(lu, self.names)
5113     else:
5114       self.wanted = locking.ALL_SET
5115
5116     self.do_locking = (self.use_locking and
5117                        query.NQ_LIVE in self.requested_data)
5118
5119     if self.do_locking:
5120       # If any non-static field is requested we need to lock the nodes
5121       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5122
5123   def DeclareLocks(self, lu, level):
5124     pass
5125
5126   def _GetQueryData(self, lu):
5127     """Computes the list of nodes and their attributes.
5128
5129     """
5130     all_info = lu.cfg.GetAllNodesInfo()
5131
5132     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5133
5134     # Gather data as requested
5135     if query.NQ_LIVE in self.requested_data:
5136       # filter out non-vm_capable nodes
5137       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5138
5139       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5140                                         [lu.cfg.GetHypervisorType()])
5141       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5142                        for (name, nresult) in node_data.items()
5143                        if not nresult.fail_msg and nresult.payload)
5144     else:
5145       live_data = None
5146
5147     if query.NQ_INST in self.requested_data:
5148       node_to_primary = dict([(name, set()) for name in nodenames])
5149       node_to_secondary = dict([(name, set()) for name in nodenames])
5150
5151       inst_data = lu.cfg.GetAllInstancesInfo()
5152
5153       for inst in inst_data.values():
5154         if inst.primary_node in node_to_primary:
5155           node_to_primary[inst.primary_node].add(inst.name)
5156         for secnode in inst.secondary_nodes:
5157           if secnode in node_to_secondary:
5158             node_to_secondary[secnode].add(inst.name)
5159     else:
5160       node_to_primary = None
5161       node_to_secondary = None
5162
5163     if query.NQ_OOB in self.requested_data:
5164       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5165                          for name, node in all_info.iteritems())
5166     else:
5167       oob_support = None
5168
5169     if query.NQ_GROUP in self.requested_data:
5170       groups = lu.cfg.GetAllNodeGroupsInfo()
5171     else:
5172       groups = {}
5173
5174     return query.NodeQueryData([all_info[name] for name in nodenames],
5175                                live_data, lu.cfg.GetMasterNode(),
5176                                node_to_primary, node_to_secondary, groups,
5177                                oob_support, lu.cfg.GetClusterInfo())
5178
5179
5180 class LUNodeQuery(NoHooksLU):
5181   """Logical unit for querying nodes.
5182
5183   """
5184   # pylint: disable=W0142
5185   REQ_BGL = False
5186
5187   def CheckArguments(self):
5188     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5189                          self.op.output_fields, self.op.use_locking)
5190
5191   def ExpandNames(self):
5192     self.nq.ExpandNames(self)
5193
5194   def DeclareLocks(self, level):
5195     self.nq.DeclareLocks(self, level)
5196
5197   def Exec(self, feedback_fn):
5198     return self.nq.OldStyleQuery(self)
5199
5200
5201 class LUNodeQueryvols(NoHooksLU):
5202   """Logical unit for getting volumes on node(s).
5203
5204   """
5205   REQ_BGL = False
5206   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5207   _FIELDS_STATIC = utils.FieldSet("node")
5208
5209   def CheckArguments(self):
5210     _CheckOutputFields(static=self._FIELDS_STATIC,
5211                        dynamic=self._FIELDS_DYNAMIC,
5212                        selected=self.op.output_fields)
5213
5214   def ExpandNames(self):
5215     self.share_locks = _ShareAll()
5216     self.needed_locks = {}
5217
5218     if not self.op.nodes:
5219       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5220     else:
5221       self.needed_locks[locking.LEVEL_NODE] = \
5222         _GetWantedNodes(self, self.op.nodes)
5223
5224   def Exec(self, feedback_fn):
5225     """Computes the list of nodes and their attributes.
5226
5227     """
5228     nodenames = self.owned_locks(locking.LEVEL_NODE)
5229     volumes = self.rpc.call_node_volumes(nodenames)
5230
5231     ilist = self.cfg.GetAllInstancesInfo()
5232     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5233
5234     output = []
5235     for node in nodenames:
5236       nresult = volumes[node]
5237       if nresult.offline:
5238         continue
5239       msg = nresult.fail_msg
5240       if msg:
5241         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5242         continue
5243
5244       node_vols = sorted(nresult.payload,
5245                          key=operator.itemgetter("dev"))
5246
5247       for vol in node_vols:
5248         node_output = []
5249         for field in self.op.output_fields:
5250           if field == "node":
5251             val = node
5252           elif field == "phys":
5253             val = vol["dev"]
5254           elif field == "vg":
5255             val = vol["vg"]
5256           elif field == "name":
5257             val = vol["name"]
5258           elif field == "size":
5259             val = int(float(vol["size"]))
5260           elif field == "instance":
5261             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5262           else:
5263             raise errors.ParameterError(field)
5264           node_output.append(str(val))
5265
5266         output.append(node_output)
5267
5268     return output
5269
5270
5271 class LUNodeQueryStorage(NoHooksLU):
5272   """Logical unit for getting information on storage units on node(s).
5273
5274   """
5275   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5276   REQ_BGL = False
5277
5278   def CheckArguments(self):
5279     _CheckOutputFields(static=self._FIELDS_STATIC,
5280                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5281                        selected=self.op.output_fields)
5282
5283   def ExpandNames(self):
5284     self.share_locks = _ShareAll()
5285     self.needed_locks = {}
5286
5287     if self.op.nodes:
5288       self.needed_locks[locking.LEVEL_NODE] = \
5289         _GetWantedNodes(self, self.op.nodes)
5290     else:
5291       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5292
5293   def Exec(self, feedback_fn):
5294     """Computes the list of nodes and their attributes.
5295
5296     """
5297     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5298
5299     # Always get name to sort by
5300     if constants.SF_NAME in self.op.output_fields:
5301       fields = self.op.output_fields[:]
5302     else:
5303       fields = [constants.SF_NAME] + self.op.output_fields
5304
5305     # Never ask for node or type as it's only known to the LU
5306     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5307       while extra in fields:
5308         fields.remove(extra)
5309
5310     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5311     name_idx = field_idx[constants.SF_NAME]
5312
5313     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5314     data = self.rpc.call_storage_list(self.nodes,
5315                                       self.op.storage_type, st_args,
5316                                       self.op.name, fields)
5317
5318     result = []
5319
5320     for node in utils.NiceSort(self.nodes):
5321       nresult = data[node]
5322       if nresult.offline:
5323         continue
5324
5325       msg = nresult.fail_msg
5326       if msg:
5327         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5328         continue
5329
5330       rows = dict([(row[name_idx], row) for row in nresult.payload])
5331
5332       for name in utils.NiceSort(rows.keys()):
5333         row = rows[name]
5334
5335         out = []
5336
5337         for field in self.op.output_fields:
5338           if field == constants.SF_NODE:
5339             val = node
5340           elif field == constants.SF_TYPE:
5341             val = self.op.storage_type
5342           elif field in field_idx:
5343             val = row[field_idx[field]]
5344           else:
5345             raise errors.ParameterError(field)
5346
5347           out.append(val)
5348
5349         result.append(out)
5350
5351     return result
5352
5353
5354 class _InstanceQuery(_QueryBase):
5355   FIELDS = query.INSTANCE_FIELDS
5356
5357   def ExpandNames(self, lu):
5358     lu.needed_locks = {}
5359     lu.share_locks = _ShareAll()
5360
5361     if self.names:
5362       self.wanted = _GetWantedInstances(lu, self.names)
5363     else:
5364       self.wanted = locking.ALL_SET
5365
5366     self.do_locking = (self.use_locking and
5367                        query.IQ_LIVE in self.requested_data)
5368     if self.do_locking:
5369       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5370       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5371       lu.needed_locks[locking.LEVEL_NODE] = []
5372       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5373
5374     self.do_grouplocks = (self.do_locking and
5375                           query.IQ_NODES in self.requested_data)
5376
5377   def DeclareLocks(self, lu, level):
5378     if self.do_locking:
5379       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5380         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5381
5382         # Lock all groups used by instances optimistically; this requires going
5383         # via the node before it's locked, requiring verification later on
5384         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5385           set(group_uuid
5386               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5387               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5388       elif level == locking.LEVEL_NODE:
5389         lu._LockInstancesNodes() # pylint: disable=W0212
5390
5391   @staticmethod
5392   def _CheckGroupLocks(lu):
5393     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5394     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5395
5396     # Check if node groups for locked instances are still correct
5397     for instance_name in owned_instances:
5398       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5399
5400   def _GetQueryData(self, lu):
5401     """Computes the list of instances and their attributes.
5402
5403     """
5404     if self.do_grouplocks:
5405       self._CheckGroupLocks(lu)
5406
5407     cluster = lu.cfg.GetClusterInfo()
5408     all_info = lu.cfg.GetAllInstancesInfo()
5409
5410     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5411
5412     instance_list = [all_info[name] for name in instance_names]
5413     nodes = frozenset(itertools.chain(*(inst.all_nodes
5414                                         for inst in instance_list)))
5415     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5416     bad_nodes = []
5417     offline_nodes = []
5418     wrongnode_inst = set()
5419
5420     # Gather data as requested
5421     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5422       live_data = {}
5423       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5424       for name in nodes:
5425         result = node_data[name]
5426         if result.offline:
5427           # offline nodes will be in both lists
5428           assert result.fail_msg
5429           offline_nodes.append(name)
5430         if result.fail_msg:
5431           bad_nodes.append(name)
5432         elif result.payload:
5433           for inst in result.payload:
5434             if inst in all_info:
5435               if all_info[inst].primary_node == name:
5436                 live_data.update(result.payload)
5437               else:
5438                 wrongnode_inst.add(inst)
5439             else:
5440               # orphan instance; we don't list it here as we don't
5441               # handle this case yet in the output of instance listing
5442               logging.warning("Orphan instance '%s' found on node %s",
5443                               inst, name)
5444         # else no instance is alive
5445     else:
5446       live_data = {}
5447
5448     if query.IQ_DISKUSAGE in self.requested_data:
5449       gmi = ganeti.masterd.instance
5450       disk_usage = dict((inst.name,
5451                          gmi.ComputeDiskSize(inst.disk_template,
5452                                              [{constants.IDISK_SIZE: disk.size}
5453                                               for disk in inst.disks]))
5454                         for inst in instance_list)
5455     else:
5456       disk_usage = None
5457
5458     if query.IQ_CONSOLE in self.requested_data:
5459       consinfo = {}
5460       for inst in instance_list:
5461         if inst.name in live_data:
5462           # Instance is running
5463           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5464         else:
5465           consinfo[inst.name] = None
5466       assert set(consinfo.keys()) == set(instance_names)
5467     else:
5468       consinfo = None
5469
5470     if query.IQ_NODES in self.requested_data:
5471       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5472                                             instance_list)))
5473       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5474       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5475                     for uuid in set(map(operator.attrgetter("group"),
5476                                         nodes.values())))
5477     else:
5478       nodes = None
5479       groups = None
5480
5481     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5482                                    disk_usage, offline_nodes, bad_nodes,
5483                                    live_data, wrongnode_inst, consinfo,
5484                                    nodes, groups)
5485
5486
5487 class LUQuery(NoHooksLU):
5488   """Query for resources/items of a certain kind.
5489
5490   """
5491   # pylint: disable=W0142
5492   REQ_BGL = False
5493
5494   def CheckArguments(self):
5495     qcls = _GetQueryImplementation(self.op.what)
5496
5497     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5498
5499   def ExpandNames(self):
5500     self.impl.ExpandNames(self)
5501
5502   def DeclareLocks(self, level):
5503     self.impl.DeclareLocks(self, level)
5504
5505   def Exec(self, feedback_fn):
5506     return self.impl.NewStyleQuery(self)
5507
5508
5509 class LUQueryFields(NoHooksLU):
5510   """Query for resources/items of a certain kind.
5511
5512   """
5513   # pylint: disable=W0142
5514   REQ_BGL = False
5515
5516   def CheckArguments(self):
5517     self.qcls = _GetQueryImplementation(self.op.what)
5518
5519   def ExpandNames(self):
5520     self.needed_locks = {}
5521
5522   def Exec(self, feedback_fn):
5523     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5524
5525
5526 class LUNodeModifyStorage(NoHooksLU):
5527   """Logical unit for modifying a storage volume on a node.
5528
5529   """
5530   REQ_BGL = False
5531
5532   def CheckArguments(self):
5533     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5534
5535     storage_type = self.op.storage_type
5536
5537     try:
5538       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5539     except KeyError:
5540       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5541                                  " modified" % storage_type,
5542                                  errors.ECODE_INVAL)
5543
5544     diff = set(self.op.changes.keys()) - modifiable
5545     if diff:
5546       raise errors.OpPrereqError("The following fields can not be modified for"
5547                                  " storage units of type '%s': %r" %
5548                                  (storage_type, list(diff)),
5549                                  errors.ECODE_INVAL)
5550
5551   def ExpandNames(self):
5552     self.needed_locks = {
5553       locking.LEVEL_NODE: self.op.node_name,
5554       }
5555
5556   def Exec(self, feedback_fn):
5557     """Computes the list of nodes and their attributes.
5558
5559     """
5560     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5561     result = self.rpc.call_storage_modify(self.op.node_name,
5562                                           self.op.storage_type, st_args,
5563                                           self.op.name, self.op.changes)
5564     result.Raise("Failed to modify storage unit '%s' on %s" %
5565                  (self.op.name, self.op.node_name))
5566
5567
5568 class LUNodeAdd(LogicalUnit):
5569   """Logical unit for adding node to the cluster.
5570
5571   """
5572   HPATH = "node-add"
5573   HTYPE = constants.HTYPE_NODE
5574   _NFLAGS = ["master_capable", "vm_capable"]
5575
5576   def CheckArguments(self):
5577     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5578     # validate/normalize the node name
5579     self.hostname = netutils.GetHostname(name=self.op.node_name,
5580                                          family=self.primary_ip_family)
5581     self.op.node_name = self.hostname.name
5582
5583     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5584       raise errors.OpPrereqError("Cannot readd the master node",
5585                                  errors.ECODE_STATE)
5586
5587     if self.op.readd and self.op.group:
5588       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5589                                  " being readded", errors.ECODE_INVAL)
5590
5591   def BuildHooksEnv(self):
5592     """Build hooks env.
5593
5594     This will run on all nodes before, and on all nodes + the new node after.
5595
5596     """
5597     return {
5598       "OP_TARGET": self.op.node_name,
5599       "NODE_NAME": self.op.node_name,
5600       "NODE_PIP": self.op.primary_ip,
5601       "NODE_SIP": self.op.secondary_ip,
5602       "MASTER_CAPABLE": str(self.op.master_capable),
5603       "VM_CAPABLE": str(self.op.vm_capable),
5604       }
5605
5606   def BuildHooksNodes(self):
5607     """Build hooks nodes.
5608
5609     """
5610     # Exclude added node
5611     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5612     post_nodes = pre_nodes + [self.op.node_name, ]
5613
5614     return (pre_nodes, post_nodes)
5615
5616   def CheckPrereq(self):
5617     """Check prerequisites.
5618
5619     This checks:
5620      - the new node is not already in the config
5621      - it is resolvable
5622      - its parameters (single/dual homed) matches the cluster
5623
5624     Any errors are signaled by raising errors.OpPrereqError.
5625
5626     """
5627     cfg = self.cfg
5628     hostname = self.hostname
5629     node = hostname.name
5630     primary_ip = self.op.primary_ip = hostname.ip
5631     if self.op.secondary_ip is None:
5632       if self.primary_ip_family == netutils.IP6Address.family:
5633         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5634                                    " IPv4 address must be given as secondary",
5635                                    errors.ECODE_INVAL)
5636       self.op.secondary_ip = primary_ip
5637
5638     secondary_ip = self.op.secondary_ip
5639     if not netutils.IP4Address.IsValid(secondary_ip):
5640       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5641                                  " address" % secondary_ip, errors.ECODE_INVAL)
5642
5643     node_list = cfg.GetNodeList()
5644     if not self.op.readd and node in node_list:
5645       raise errors.OpPrereqError("Node %s is already in the configuration" %
5646                                  node, errors.ECODE_EXISTS)
5647     elif self.op.readd and node not in node_list:
5648       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5649                                  errors.ECODE_NOENT)
5650
5651     self.changed_primary_ip = False
5652
5653     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5654       if self.op.readd and node == existing_node_name:
5655         if existing_node.secondary_ip != secondary_ip:
5656           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5657                                      " address configuration as before",
5658                                      errors.ECODE_INVAL)
5659         if existing_node.primary_ip != primary_ip:
5660           self.changed_primary_ip = True
5661
5662         continue
5663
5664       if (existing_node.primary_ip == primary_ip or
5665           existing_node.secondary_ip == primary_ip or
5666           existing_node.primary_ip == secondary_ip or
5667           existing_node.secondary_ip == secondary_ip):
5668         raise errors.OpPrereqError("New node ip address(es) conflict with"
5669                                    " existing node %s" % existing_node.name,
5670                                    errors.ECODE_NOTUNIQUE)
5671
5672     # After this 'if' block, None is no longer a valid value for the
5673     # _capable op attributes
5674     if self.op.readd:
5675       old_node = self.cfg.GetNodeInfo(node)
5676       assert old_node is not None, "Can't retrieve locked node %s" % node
5677       for attr in self._NFLAGS:
5678         if getattr(self.op, attr) is None:
5679           setattr(self.op, attr, getattr(old_node, attr))
5680     else:
5681       for attr in self._NFLAGS:
5682         if getattr(self.op, attr) is None:
5683           setattr(self.op, attr, True)
5684
5685     if self.op.readd and not self.op.vm_capable:
5686       pri, sec = cfg.GetNodeInstances(node)
5687       if pri or sec:
5688         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5689                                    " flag set to false, but it already holds"
5690                                    " instances" % node,
5691                                    errors.ECODE_STATE)
5692
5693     # check that the type of the node (single versus dual homed) is the
5694     # same as for the master
5695     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5696     master_singlehomed = myself.secondary_ip == myself.primary_ip
5697     newbie_singlehomed = secondary_ip == primary_ip
5698     if master_singlehomed != newbie_singlehomed:
5699       if master_singlehomed:
5700         raise errors.OpPrereqError("The master has no secondary ip but the"
5701                                    " new node has one",
5702                                    errors.ECODE_INVAL)
5703       else:
5704         raise errors.OpPrereqError("The master has a secondary ip but the"
5705                                    " new node doesn't have one",
5706                                    errors.ECODE_INVAL)
5707
5708     # checks reachability
5709     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5710       raise errors.OpPrereqError("Node not reachable by ping",
5711                                  errors.ECODE_ENVIRON)
5712
5713     if not newbie_singlehomed:
5714       # check reachability from my secondary ip to newbie's secondary ip
5715       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5716                               source=myself.secondary_ip):
5717         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5718                                    " based ping to node daemon port",
5719                                    errors.ECODE_ENVIRON)
5720
5721     if self.op.readd:
5722       exceptions = [node]
5723     else:
5724       exceptions = []
5725
5726     if self.op.master_capable:
5727       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5728     else:
5729       self.master_candidate = False
5730
5731     if self.op.readd:
5732       self.new_node = old_node
5733     else:
5734       node_group = cfg.LookupNodeGroup(self.op.group)
5735       self.new_node = objects.Node(name=node,
5736                                    primary_ip=primary_ip,
5737                                    secondary_ip=secondary_ip,
5738                                    master_candidate=self.master_candidate,
5739                                    offline=False, drained=False,
5740                                    group=node_group)
5741
5742     if self.op.ndparams:
5743       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5744
5745     if self.op.hv_state:
5746       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5747
5748     if self.op.disk_state:
5749       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5750
5751     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5752     #       it a property on the base class.
5753     result = rpc.DnsOnlyRunner().call_version([node])[node]
5754     result.Raise("Can't get version information from node %s" % node)
5755     if constants.PROTOCOL_VERSION == result.payload:
5756       logging.info("Communication to node %s fine, sw version %s match",
5757                    node, result.payload)
5758     else:
5759       raise errors.OpPrereqError("Version mismatch master version %s,"
5760                                  " node version %s" %
5761                                  (constants.PROTOCOL_VERSION, result.payload),
5762                                  errors.ECODE_ENVIRON)
5763
5764   def Exec(self, feedback_fn):
5765     """Adds the new node to the cluster.
5766
5767     """
5768     new_node = self.new_node
5769     node = new_node.name
5770
5771     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5772       "Not owning BGL"
5773
5774     # We adding a new node so we assume it's powered
5775     new_node.powered = True
5776
5777     # for re-adds, reset the offline/drained/master-candidate flags;
5778     # we need to reset here, otherwise offline would prevent RPC calls
5779     # later in the procedure; this also means that if the re-add
5780     # fails, we are left with a non-offlined, broken node
5781     if self.op.readd:
5782       new_node.drained = new_node.offline = False # pylint: disable=W0201
5783       self.LogInfo("Readding a node, the offline/drained flags were reset")
5784       # if we demote the node, we do cleanup later in the procedure
5785       new_node.master_candidate = self.master_candidate
5786       if self.changed_primary_ip:
5787         new_node.primary_ip = self.op.primary_ip
5788
5789     # copy the master/vm_capable flags
5790     for attr in self._NFLAGS:
5791       setattr(new_node, attr, getattr(self.op, attr))
5792
5793     # notify the user about any possible mc promotion
5794     if new_node.master_candidate:
5795       self.LogInfo("Node will be a master candidate")
5796
5797     if self.op.ndparams:
5798       new_node.ndparams = self.op.ndparams
5799     else:
5800       new_node.ndparams = {}
5801
5802     if self.op.hv_state:
5803       new_node.hv_state_static = self.new_hv_state
5804
5805     if self.op.disk_state:
5806       new_node.disk_state_static = self.new_disk_state
5807
5808     # Add node to our /etc/hosts, and add key to known_hosts
5809     if self.cfg.GetClusterInfo().modify_etc_hosts:
5810       master_node = self.cfg.GetMasterNode()
5811       result = self.rpc.call_etc_hosts_modify(master_node,
5812                                               constants.ETC_HOSTS_ADD,
5813                                               self.hostname.name,
5814                                               self.hostname.ip)
5815       result.Raise("Can't update hosts file with new host data")
5816
5817     if new_node.secondary_ip != new_node.primary_ip:
5818       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5819                                False)
5820
5821     node_verify_list = [self.cfg.GetMasterNode()]
5822     node_verify_param = {
5823       constants.NV_NODELIST: ([node], {}),
5824       # TODO: do a node-net-test as well?
5825     }
5826
5827     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5828                                        self.cfg.GetClusterName())
5829     for verifier in node_verify_list:
5830       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5831       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5832       if nl_payload:
5833         for failed in nl_payload:
5834           feedback_fn("ssh/hostname verification failed"
5835                       " (checking from %s): %s" %
5836                       (verifier, nl_payload[failed]))
5837         raise errors.OpExecError("ssh/hostname verification failed")
5838
5839     if self.op.readd:
5840       _RedistributeAncillaryFiles(self)
5841       self.context.ReaddNode(new_node)
5842       # make sure we redistribute the config
5843       self.cfg.Update(new_node, feedback_fn)
5844       # and make sure the new node will not have old files around
5845       if not new_node.master_candidate:
5846         result = self.rpc.call_node_demote_from_mc(new_node.name)
5847         msg = result.fail_msg
5848         if msg:
5849           self.LogWarning("Node failed to demote itself from master"
5850                           " candidate status: %s" % msg)
5851     else:
5852       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5853                                   additional_vm=self.op.vm_capable)
5854       self.context.AddNode(new_node, self.proc.GetECId())
5855
5856
5857 class LUNodeSetParams(LogicalUnit):
5858   """Modifies the parameters of a node.
5859
5860   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5861       to the node role (as _ROLE_*)
5862   @cvar _R2F: a dictionary from node role to tuples of flags
5863   @cvar _FLAGS: a list of attribute names corresponding to the flags
5864
5865   """
5866   HPATH = "node-modify"
5867   HTYPE = constants.HTYPE_NODE
5868   REQ_BGL = False
5869   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5870   _F2R = {
5871     (True, False, False): _ROLE_CANDIDATE,
5872     (False, True, False): _ROLE_DRAINED,
5873     (False, False, True): _ROLE_OFFLINE,
5874     (False, False, False): _ROLE_REGULAR,
5875     }
5876   _R2F = dict((v, k) for k, v in _F2R.items())
5877   _FLAGS = ["master_candidate", "drained", "offline"]
5878
5879   def CheckArguments(self):
5880     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5881     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5882                 self.op.master_capable, self.op.vm_capable,
5883                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5884                 self.op.disk_state]
5885     if all_mods.count(None) == len(all_mods):
5886       raise errors.OpPrereqError("Please pass at least one modification",
5887                                  errors.ECODE_INVAL)
5888     if all_mods.count(True) > 1:
5889       raise errors.OpPrereqError("Can't set the node into more than one"
5890                                  " state at the same time",
5891                                  errors.ECODE_INVAL)
5892
5893     # Boolean value that tells us whether we might be demoting from MC
5894     self.might_demote = (self.op.master_candidate is False or
5895                          self.op.offline is True or
5896                          self.op.drained is True or
5897                          self.op.master_capable is False)
5898
5899     if self.op.secondary_ip:
5900       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5901         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5902                                    " address" % self.op.secondary_ip,
5903                                    errors.ECODE_INVAL)
5904
5905     self.lock_all = self.op.auto_promote and self.might_demote
5906     self.lock_instances = self.op.secondary_ip is not None
5907
5908   def _InstanceFilter(self, instance):
5909     """Filter for getting affected instances.
5910
5911     """
5912     return (instance.disk_template in constants.DTS_INT_MIRROR and
5913             self.op.node_name in instance.all_nodes)
5914
5915   def ExpandNames(self):
5916     if self.lock_all:
5917       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5918     else:
5919       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5920
5921     # Since modifying a node can have severe effects on currently running
5922     # operations the resource lock is at least acquired in shared mode
5923     self.needed_locks[locking.LEVEL_NODE_RES] = \
5924       self.needed_locks[locking.LEVEL_NODE]
5925
5926     # Get node resource and instance locks in shared mode; they are not used
5927     # for anything but read-only access
5928     self.share_locks[locking.LEVEL_NODE_RES] = 1
5929     self.share_locks[locking.LEVEL_INSTANCE] = 1
5930
5931     if self.lock_instances:
5932       self.needed_locks[locking.LEVEL_INSTANCE] = \
5933         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5934
5935   def BuildHooksEnv(self):
5936     """Build hooks env.
5937
5938     This runs on the master node.
5939
5940     """
5941     return {
5942       "OP_TARGET": self.op.node_name,
5943       "MASTER_CANDIDATE": str(self.op.master_candidate),
5944       "OFFLINE": str(self.op.offline),
5945       "DRAINED": str(self.op.drained),
5946       "MASTER_CAPABLE": str(self.op.master_capable),
5947       "VM_CAPABLE": str(self.op.vm_capable),
5948       }
5949
5950   def BuildHooksNodes(self):
5951     """Build hooks nodes.
5952
5953     """
5954     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5955     return (nl, nl)
5956
5957   def CheckPrereq(self):
5958     """Check prerequisites.
5959
5960     This only checks the instance list against the existing names.
5961
5962     """
5963     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5964
5965     if self.lock_instances:
5966       affected_instances = \
5967         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5968
5969       # Verify instance locks
5970       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5971       wanted_instances = frozenset(affected_instances.keys())
5972       if wanted_instances - owned_instances:
5973         raise errors.OpPrereqError("Instances affected by changing node %s's"
5974                                    " secondary IP address have changed since"
5975                                    " locks were acquired, wanted '%s', have"
5976                                    " '%s'; retry the operation" %
5977                                    (self.op.node_name,
5978                                     utils.CommaJoin(wanted_instances),
5979                                     utils.CommaJoin(owned_instances)),
5980                                    errors.ECODE_STATE)
5981     else:
5982       affected_instances = None
5983
5984     if (self.op.master_candidate is not None or
5985         self.op.drained is not None or
5986         self.op.offline is not None):
5987       # we can't change the master's node flags
5988       if self.op.node_name == self.cfg.GetMasterNode():
5989         raise errors.OpPrereqError("The master role can be changed"
5990                                    " only via master-failover",
5991                                    errors.ECODE_INVAL)
5992
5993     if self.op.master_candidate and not node.master_capable:
5994       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5995                                  " it a master candidate" % node.name,
5996                                  errors.ECODE_STATE)
5997
5998     if self.op.vm_capable is False:
5999       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6000       if ipri or isec:
6001         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6002                                    " the vm_capable flag" % node.name,
6003                                    errors.ECODE_STATE)
6004
6005     if node.master_candidate and self.might_demote and not self.lock_all:
6006       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6007       # check if after removing the current node, we're missing master
6008       # candidates
6009       (mc_remaining, mc_should, _) = \
6010           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6011       if mc_remaining < mc_should:
6012         raise errors.OpPrereqError("Not enough master candidates, please"
6013                                    " pass auto promote option to allow"
6014                                    " promotion (--auto-promote or RAPI"
6015                                    " auto_promote=True)", errors.ECODE_STATE)
6016
6017     self.old_flags = old_flags = (node.master_candidate,
6018                                   node.drained, node.offline)
6019     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6020     self.old_role = old_role = self._F2R[old_flags]
6021
6022     # Check for ineffective changes
6023     for attr in self._FLAGS:
6024       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6025         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6026         setattr(self.op, attr, None)
6027
6028     # Past this point, any flag change to False means a transition
6029     # away from the respective state, as only real changes are kept
6030
6031     # TODO: We might query the real power state if it supports OOB
6032     if _SupportsOob(self.cfg, node):
6033       if self.op.offline is False and not (node.powered or
6034                                            self.op.powered is True):
6035         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6036                                     " offline status can be reset") %
6037                                    self.op.node_name, errors.ECODE_STATE)
6038     elif self.op.powered is not None:
6039       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6040                                   " as it does not support out-of-band"
6041                                   " handling") % self.op.node_name,
6042                                  errors.ECODE_STATE)
6043
6044     # If we're being deofflined/drained, we'll MC ourself if needed
6045     if (self.op.drained is False or self.op.offline is False or
6046         (self.op.master_capable and not node.master_capable)):
6047       if _DecideSelfPromotion(self):
6048         self.op.master_candidate = True
6049         self.LogInfo("Auto-promoting node to master candidate")
6050
6051     # If we're no longer master capable, we'll demote ourselves from MC
6052     if self.op.master_capable is False and node.master_candidate:
6053       self.LogInfo("Demoting from master candidate")
6054       self.op.master_candidate = False
6055
6056     # Compute new role
6057     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6058     if self.op.master_candidate:
6059       new_role = self._ROLE_CANDIDATE
6060     elif self.op.drained:
6061       new_role = self._ROLE_DRAINED
6062     elif self.op.offline:
6063       new_role = self._ROLE_OFFLINE
6064     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6065       # False is still in new flags, which means we're un-setting (the
6066       # only) True flag
6067       new_role = self._ROLE_REGULAR
6068     else: # no new flags, nothing, keep old role
6069       new_role = old_role
6070
6071     self.new_role = new_role
6072
6073     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6074       # Trying to transition out of offline status
6075       result = self.rpc.call_version([node.name])[node.name]
6076       if result.fail_msg:
6077         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6078                                    " to report its version: %s" %
6079                                    (node.name, result.fail_msg),
6080                                    errors.ECODE_STATE)
6081       else:
6082         self.LogWarning("Transitioning node from offline to online state"
6083                         " without using re-add. Please make sure the node"
6084                         " is healthy!")
6085
6086     # When changing the secondary ip, verify if this is a single-homed to
6087     # multi-homed transition or vice versa, and apply the relevant
6088     # restrictions.
6089     if self.op.secondary_ip:
6090       # Ok even without locking, because this can't be changed by any LU
6091       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092       master_singlehomed = master.secondary_ip == master.primary_ip
6093       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6094         if self.op.force and node.name == master.name:
6095           self.LogWarning("Transitioning from single-homed to multi-homed"
6096                           " cluster. All nodes will require a secondary ip.")
6097         else:
6098           raise errors.OpPrereqError("Changing the secondary ip on a"
6099                                      " single-homed cluster requires the"
6100                                      " --force option to be passed, and the"
6101                                      " target node to be the master",
6102                                      errors.ECODE_INVAL)
6103       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6104         if self.op.force and node.name == master.name:
6105           self.LogWarning("Transitioning from multi-homed to single-homed"
6106                           " cluster. Secondary IPs will have to be removed.")
6107         else:
6108           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6109                                      " same as the primary IP on a multi-homed"
6110                                      " cluster, unless the --force option is"
6111                                      " passed, and the target node is the"
6112                                      " master", errors.ECODE_INVAL)
6113
6114       assert not (frozenset(affected_instances) -
6115                   self.owned_locks(locking.LEVEL_INSTANCE))
6116
6117       if node.offline:
6118         if affected_instances:
6119           msg = ("Cannot change secondary IP address: offline node has"
6120                  " instances (%s) configured to use it" %
6121                  utils.CommaJoin(affected_instances.keys()))
6122           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6123       else:
6124         # On online nodes, check that no instances are running, and that
6125         # the node has the new ip and we can reach it.
6126         for instance in affected_instances.values():
6127           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6128                               msg="cannot change secondary ip")
6129
6130         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6131         if master.name != node.name:
6132           # check reachability from master secondary ip to new secondary ip
6133           if not netutils.TcpPing(self.op.secondary_ip,
6134                                   constants.DEFAULT_NODED_PORT,
6135                                   source=master.secondary_ip):
6136             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6137                                        " based ping to node daemon port",
6138                                        errors.ECODE_ENVIRON)
6139
6140     if self.op.ndparams:
6141       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6142       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6143       self.new_ndparams = new_ndparams
6144
6145     if self.op.hv_state:
6146       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6147                                                  self.node.hv_state_static)
6148
6149     if self.op.disk_state:
6150       self.new_disk_state = \
6151         _MergeAndVerifyDiskState(self.op.disk_state,
6152                                  self.node.disk_state_static)
6153
6154   def Exec(self, feedback_fn):
6155     """Modifies a node.
6156
6157     """
6158     node = self.node
6159     old_role = self.old_role
6160     new_role = self.new_role
6161
6162     result = []
6163
6164     if self.op.ndparams:
6165       node.ndparams = self.new_ndparams
6166
6167     if self.op.powered is not None:
6168       node.powered = self.op.powered
6169
6170     if self.op.hv_state:
6171       node.hv_state_static = self.new_hv_state
6172
6173     if self.op.disk_state:
6174       node.disk_state_static = self.new_disk_state
6175
6176     for attr in ["master_capable", "vm_capable"]:
6177       val = getattr(self.op, attr)
6178       if val is not None:
6179         setattr(node, attr, val)
6180         result.append((attr, str(val)))
6181
6182     if new_role != old_role:
6183       # Tell the node to demote itself, if no longer MC and not offline
6184       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6185         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6186         if msg:
6187           self.LogWarning("Node failed to demote itself: %s", msg)
6188
6189       new_flags = self._R2F[new_role]
6190       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6191         if of != nf:
6192           result.append((desc, str(nf)))
6193       (node.master_candidate, node.drained, node.offline) = new_flags
6194
6195       # we locked all nodes, we adjust the CP before updating this node
6196       if self.lock_all:
6197         _AdjustCandidatePool(self, [node.name])
6198
6199     if self.op.secondary_ip:
6200       node.secondary_ip = self.op.secondary_ip
6201       result.append(("secondary_ip", self.op.secondary_ip))
6202
6203     # this will trigger configuration file update, if needed
6204     self.cfg.Update(node, feedback_fn)
6205
6206     # this will trigger job queue propagation or cleanup if the mc
6207     # flag changed
6208     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6209       self.context.ReaddNode(node)
6210
6211     return result
6212
6213
6214 class LUNodePowercycle(NoHooksLU):
6215   """Powercycles a node.
6216
6217   """
6218   REQ_BGL = False
6219
6220   def CheckArguments(self):
6221     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6222     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6223       raise errors.OpPrereqError("The node is the master and the force"
6224                                  " parameter was not set",
6225                                  errors.ECODE_INVAL)
6226
6227   def ExpandNames(self):
6228     """Locking for PowercycleNode.
6229
6230     This is a last-resort option and shouldn't block on other
6231     jobs. Therefore, we grab no locks.
6232
6233     """
6234     self.needed_locks = {}
6235
6236   def Exec(self, feedback_fn):
6237     """Reboots a node.
6238
6239     """
6240     result = self.rpc.call_node_powercycle(self.op.node_name,
6241                                            self.cfg.GetHypervisorType())
6242     result.Raise("Failed to schedule the reboot")
6243     return result.payload
6244
6245
6246 class LUClusterQuery(NoHooksLU):
6247   """Query cluster configuration.
6248
6249   """
6250   REQ_BGL = False
6251
6252   def ExpandNames(self):
6253     self.needed_locks = {}
6254
6255   def Exec(self, feedback_fn):
6256     """Return cluster config.
6257
6258     """
6259     cluster = self.cfg.GetClusterInfo()
6260     os_hvp = {}
6261
6262     # Filter just for enabled hypervisors
6263     for os_name, hv_dict in cluster.os_hvp.items():
6264       os_hvp[os_name] = {}
6265       for hv_name, hv_params in hv_dict.items():
6266         if hv_name in cluster.enabled_hypervisors:
6267           os_hvp[os_name][hv_name] = hv_params
6268
6269     # Convert ip_family to ip_version
6270     primary_ip_version = constants.IP4_VERSION
6271     if cluster.primary_ip_family == netutils.IP6Address.family:
6272       primary_ip_version = constants.IP6_VERSION
6273
6274     result = {
6275       "software_version": constants.RELEASE_VERSION,
6276       "protocol_version": constants.PROTOCOL_VERSION,
6277       "config_version": constants.CONFIG_VERSION,
6278       "os_api_version": max(constants.OS_API_VERSIONS),
6279       "export_version": constants.EXPORT_VERSION,
6280       "architecture": runtime.GetArchInfo(),
6281       "name": cluster.cluster_name,
6282       "master": cluster.master_node,
6283       "default_hypervisor": cluster.primary_hypervisor,
6284       "enabled_hypervisors": cluster.enabled_hypervisors,
6285       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6286                         for hypervisor_name in cluster.enabled_hypervisors]),
6287       "os_hvp": os_hvp,
6288       "beparams": cluster.beparams,
6289       "osparams": cluster.osparams,
6290       "ipolicy": cluster.ipolicy,
6291       "nicparams": cluster.nicparams,
6292       "ndparams": cluster.ndparams,
6293       "diskparams": cluster.diskparams,
6294       "candidate_pool_size": cluster.candidate_pool_size,
6295       "master_netdev": cluster.master_netdev,
6296       "master_netmask": cluster.master_netmask,
6297       "use_external_mip_script": cluster.use_external_mip_script,
6298       "volume_group_name": cluster.volume_group_name,
6299       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6300       "file_storage_dir": cluster.file_storage_dir,
6301       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6302       "maintain_node_health": cluster.maintain_node_health,
6303       "ctime": cluster.ctime,
6304       "mtime": cluster.mtime,
6305       "uuid": cluster.uuid,
6306       "tags": list(cluster.GetTags()),
6307       "uid_pool": cluster.uid_pool,
6308       "default_iallocator": cluster.default_iallocator,
6309       "reserved_lvs": cluster.reserved_lvs,
6310       "primary_ip_version": primary_ip_version,
6311       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6312       "hidden_os": cluster.hidden_os,
6313       "blacklisted_os": cluster.blacklisted_os,
6314       }
6315
6316     return result
6317
6318
6319 class LUClusterConfigQuery(NoHooksLU):
6320   """Return configuration values.
6321
6322   """
6323   REQ_BGL = False
6324
6325   def CheckArguments(self):
6326     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6327
6328   def ExpandNames(self):
6329     self.cq.ExpandNames(self)
6330
6331   def DeclareLocks(self, level):
6332     self.cq.DeclareLocks(self, level)
6333
6334   def Exec(self, feedback_fn):
6335     result = self.cq.OldStyleQuery(self)
6336
6337     assert len(result) == 1
6338
6339     return result[0]
6340
6341
6342 class _ClusterQuery(_QueryBase):
6343   FIELDS = query.CLUSTER_FIELDS
6344
6345   #: Do not sort (there is only one item)
6346   SORT_FIELD = None
6347
6348   def ExpandNames(self, lu):
6349     lu.needed_locks = {}
6350
6351     # The following variables interact with _QueryBase._GetNames
6352     self.wanted = locking.ALL_SET
6353     self.do_locking = self.use_locking
6354
6355     if self.do_locking:
6356       raise errors.OpPrereqError("Can not use locking for cluster queries",
6357                                  errors.ECODE_INVAL)
6358
6359   def DeclareLocks(self, lu, level):
6360     pass
6361
6362   def _GetQueryData(self, lu):
6363     """Computes the list of nodes and their attributes.
6364
6365     """
6366     # Locking is not used
6367     assert not (compat.any(lu.glm.is_owned(level)
6368                            for level in locking.LEVELS
6369                            if level != locking.LEVEL_CLUSTER) or
6370                 self.do_locking or self.use_locking)
6371
6372     if query.CQ_CONFIG in self.requested_data:
6373       cluster = lu.cfg.GetClusterInfo()
6374     else:
6375       cluster = NotImplemented
6376
6377     if query.CQ_QUEUE_DRAINED in self.requested_data:
6378       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6379     else:
6380       drain_flag = NotImplemented
6381
6382     if query.CQ_WATCHER_PAUSE in self.requested_data:
6383       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6384     else:
6385       watcher_pause = NotImplemented
6386
6387     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6388
6389
6390 class LUInstanceActivateDisks(NoHooksLU):
6391   """Bring up an instance's disks.
6392
6393   """
6394   REQ_BGL = False
6395
6396   def ExpandNames(self):
6397     self._ExpandAndLockInstance()
6398     self.needed_locks[locking.LEVEL_NODE] = []
6399     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6400
6401   def DeclareLocks(self, level):
6402     if level == locking.LEVEL_NODE:
6403       self._LockInstancesNodes()
6404
6405   def CheckPrereq(self):
6406     """Check prerequisites.
6407
6408     This checks that the instance is in the cluster.
6409
6410     """
6411     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6412     assert self.instance is not None, \
6413       "Cannot retrieve locked instance %s" % self.op.instance_name
6414     _CheckNodeOnline(self, self.instance.primary_node)
6415
6416   def Exec(self, feedback_fn):
6417     """Activate the disks.
6418
6419     """
6420     disks_ok, disks_info = \
6421               _AssembleInstanceDisks(self, self.instance,
6422                                      ignore_size=self.op.ignore_size)
6423     if not disks_ok:
6424       raise errors.OpExecError("Cannot activate block devices")
6425
6426     if self.op.wait_for_sync:
6427       if not _WaitForSync(self, self.instance):
6428         raise errors.OpExecError("Some disks of the instance are degraded!")
6429
6430     return disks_info
6431
6432
6433 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6434                            ignore_size=False):
6435   """Prepare the block devices for an instance.
6436
6437   This sets up the block devices on all nodes.
6438
6439   @type lu: L{LogicalUnit}
6440   @param lu: the logical unit on whose behalf we execute
6441   @type instance: L{objects.Instance}
6442   @param instance: the instance for whose disks we assemble
6443   @type disks: list of L{objects.Disk} or None
6444   @param disks: which disks to assemble (or all, if None)
6445   @type ignore_secondaries: boolean
6446   @param ignore_secondaries: if true, errors on secondary nodes
6447       won't result in an error return from the function
6448   @type ignore_size: boolean
6449   @param ignore_size: if true, the current known size of the disk
6450       will not be used during the disk activation, useful for cases
6451       when the size is wrong
6452   @return: False if the operation failed, otherwise a list of
6453       (host, instance_visible_name, node_visible_name)
6454       with the mapping from node devices to instance devices
6455
6456   """
6457   device_info = []
6458   disks_ok = True
6459   iname = instance.name
6460   disks = _ExpandCheckDisks(instance, disks)
6461
6462   # With the two passes mechanism we try to reduce the window of
6463   # opportunity for the race condition of switching DRBD to primary
6464   # before handshaking occured, but we do not eliminate it
6465
6466   # The proper fix would be to wait (with some limits) until the
6467   # connection has been made and drbd transitions from WFConnection
6468   # into any other network-connected state (Connected, SyncTarget,
6469   # SyncSource, etc.)
6470
6471   # 1st pass, assemble on all nodes in secondary mode
6472   for idx, inst_disk in enumerate(disks):
6473     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6474       if ignore_size:
6475         node_disk = node_disk.Copy()
6476         node_disk.UnsetSize()
6477       lu.cfg.SetDiskID(node_disk, node)
6478       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6479                                              False, idx)
6480       msg = result.fail_msg
6481       if msg:
6482         is_offline_secondary = (node in instance.secondary_nodes and
6483                                 result.offline)
6484         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6485                            " (is_primary=False, pass=1): %s",
6486                            inst_disk.iv_name, node, msg)
6487         if not (ignore_secondaries or is_offline_secondary):
6488           disks_ok = False
6489
6490   # FIXME: race condition on drbd migration to primary
6491
6492   # 2nd pass, do only the primary node
6493   for idx, inst_disk in enumerate(disks):
6494     dev_path = None
6495
6496     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6497       if node != instance.primary_node:
6498         continue
6499       if ignore_size:
6500         node_disk = node_disk.Copy()
6501         node_disk.UnsetSize()
6502       lu.cfg.SetDiskID(node_disk, node)
6503       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6504                                              True, idx)
6505       msg = result.fail_msg
6506       if msg:
6507         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6508                            " (is_primary=True, pass=2): %s",
6509                            inst_disk.iv_name, node, msg)
6510         disks_ok = False
6511       else:
6512         dev_path = result.payload
6513
6514     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6515
6516   # leave the disks configured for the primary node
6517   # this is a workaround that would be fixed better by
6518   # improving the logical/physical id handling
6519   for disk in disks:
6520     lu.cfg.SetDiskID(disk, instance.primary_node)
6521
6522   return disks_ok, device_info
6523
6524
6525 def _StartInstanceDisks(lu, instance, force):
6526   """Start the disks of an instance.
6527
6528   """
6529   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6530                                            ignore_secondaries=force)
6531   if not disks_ok:
6532     _ShutdownInstanceDisks(lu, instance)
6533     if force is not None and not force:
6534       lu.proc.LogWarning("", hint="If the message above refers to a"
6535                          " secondary node,"
6536                          " you can retry the operation using '--force'.")
6537     raise errors.OpExecError("Disk consistency error")
6538
6539
6540 class LUInstanceDeactivateDisks(NoHooksLU):
6541   """Shutdown an instance's disks.
6542
6543   """
6544   REQ_BGL = False
6545
6546   def ExpandNames(self):
6547     self._ExpandAndLockInstance()
6548     self.needed_locks[locking.LEVEL_NODE] = []
6549     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6550
6551   def DeclareLocks(self, level):
6552     if level == locking.LEVEL_NODE:
6553       self._LockInstancesNodes()
6554
6555   def CheckPrereq(self):
6556     """Check prerequisites.
6557
6558     This checks that the instance is in the cluster.
6559
6560     """
6561     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6562     assert self.instance is not None, \
6563       "Cannot retrieve locked instance %s" % self.op.instance_name
6564
6565   def Exec(self, feedback_fn):
6566     """Deactivate the disks
6567
6568     """
6569     instance = self.instance
6570     if self.op.force:
6571       _ShutdownInstanceDisks(self, instance)
6572     else:
6573       _SafeShutdownInstanceDisks(self, instance)
6574
6575
6576 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6577   """Shutdown block devices of an instance.
6578
6579   This function checks if an instance is running, before calling
6580   _ShutdownInstanceDisks.
6581
6582   """
6583   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6584   _ShutdownInstanceDisks(lu, instance, disks=disks)
6585
6586
6587 def _ExpandCheckDisks(instance, disks):
6588   """Return the instance disks selected by the disks list
6589
6590   @type disks: list of L{objects.Disk} or None
6591   @param disks: selected disks
6592   @rtype: list of L{objects.Disk}
6593   @return: selected instance disks to act on
6594
6595   """
6596   if disks is None:
6597     return instance.disks
6598   else:
6599     if not set(disks).issubset(instance.disks):
6600       raise errors.ProgrammerError("Can only act on disks belonging to the"
6601                                    " target instance")
6602     return disks
6603
6604
6605 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6606   """Shutdown block devices of an instance.
6607
6608   This does the shutdown on all nodes of the instance.
6609
6610   If the ignore_primary is false, errors on the primary node are
6611   ignored.
6612
6613   """
6614   all_result = True
6615   disks = _ExpandCheckDisks(instance, disks)
6616
6617   for disk in disks:
6618     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6619       lu.cfg.SetDiskID(top_disk, node)
6620       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6621       msg = result.fail_msg
6622       if msg:
6623         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6624                       disk.iv_name, node, msg)
6625         if ((node == instance.primary_node and not ignore_primary) or
6626             (node != instance.primary_node and not result.offline)):
6627           all_result = False
6628   return all_result
6629
6630
6631 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6632   """Checks if a node has enough free memory.
6633
6634   This function check if a given node has the needed amount of free
6635   memory. In case the node has less memory or we cannot get the
6636   information from the node, this function raise an OpPrereqError
6637   exception.
6638
6639   @type lu: C{LogicalUnit}
6640   @param lu: a logical unit from which we get configuration data
6641   @type node: C{str}
6642   @param node: the node to check
6643   @type reason: C{str}
6644   @param reason: string to use in the error message
6645   @type requested: C{int}
6646   @param requested: the amount of memory in MiB to check for
6647   @type hypervisor_name: C{str}
6648   @param hypervisor_name: the hypervisor to ask for memory stats
6649   @rtype: integer
6650   @return: node current free memory
6651   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6652       we cannot check the node
6653
6654   """
6655   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6656   nodeinfo[node].Raise("Can't get data from node %s" % node,
6657                        prereq=True, ecode=errors.ECODE_ENVIRON)
6658   (_, _, (hv_info, )) = nodeinfo[node].payload
6659
6660   free_mem = hv_info.get("memory_free", None)
6661   if not isinstance(free_mem, int):
6662     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6663                                " was '%s'" % (node, free_mem),
6664                                errors.ECODE_ENVIRON)
6665   if requested > free_mem:
6666     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6667                                " needed %s MiB, available %s MiB" %
6668                                (node, reason, requested, free_mem),
6669                                errors.ECODE_NORES)
6670   return free_mem
6671
6672
6673 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6674   """Checks if nodes have enough free disk space in the all VGs.
6675
6676   This function check if all given nodes have the needed amount of
6677   free disk. In case any node has less disk or we cannot get the
6678   information from the node, this function raise an OpPrereqError
6679   exception.
6680
6681   @type lu: C{LogicalUnit}
6682   @param lu: a logical unit from which we get configuration data
6683   @type nodenames: C{list}
6684   @param nodenames: the list of node names to check
6685   @type req_sizes: C{dict}
6686   @param req_sizes: the hash of vg and corresponding amount of disk in
6687       MiB to check for
6688   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6689       or we cannot check the node
6690
6691   """
6692   for vg, req_size in req_sizes.items():
6693     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6694
6695
6696 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6697   """Checks if nodes have enough free disk space in the specified VG.
6698
6699   This function check if all given nodes have the needed amount of
6700   free disk. In case any node has less disk or we cannot get the
6701   information from the node, this function raise an OpPrereqError
6702   exception.
6703
6704   @type lu: C{LogicalUnit}
6705   @param lu: a logical unit from which we get configuration data
6706   @type nodenames: C{list}
6707   @param nodenames: the list of node names to check
6708   @type vg: C{str}
6709   @param vg: the volume group to check
6710   @type requested: C{int}
6711   @param requested: the amount of disk in MiB to check for
6712   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6713       or we cannot check the node
6714
6715   """
6716   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6717   for node in nodenames:
6718     info = nodeinfo[node]
6719     info.Raise("Cannot get current information from node %s" % node,
6720                prereq=True, ecode=errors.ECODE_ENVIRON)
6721     (_, (vg_info, ), _) = info.payload
6722     vg_free = vg_info.get("vg_free", None)
6723     if not isinstance(vg_free, int):
6724       raise errors.OpPrereqError("Can't compute free disk space on node"
6725                                  " %s for vg %s, result was '%s'" %
6726                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6727     if requested > vg_free:
6728       raise errors.OpPrereqError("Not enough disk space on target node %s"
6729                                  " vg %s: required %d MiB, available %d MiB" %
6730                                  (node, vg, requested, vg_free),
6731                                  errors.ECODE_NORES)
6732
6733
6734 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6735   """Checks if nodes have enough physical CPUs
6736
6737   This function checks if all given nodes have the needed number of
6738   physical CPUs. In case any node has less CPUs or we cannot get the
6739   information from the node, this function raises an OpPrereqError
6740   exception.
6741
6742   @type lu: C{LogicalUnit}
6743   @param lu: a logical unit from which we get configuration data
6744   @type nodenames: C{list}
6745   @param nodenames: the list of node names to check
6746   @type requested: C{int}
6747   @param requested: the minimum acceptable number of physical CPUs
6748   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6749       or we cannot check the node
6750
6751   """
6752   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6753   for node in nodenames:
6754     info = nodeinfo[node]
6755     info.Raise("Cannot get current information from node %s" % node,
6756                prereq=True, ecode=errors.ECODE_ENVIRON)
6757     (_, _, (hv_info, )) = info.payload
6758     num_cpus = hv_info.get("cpu_total", None)
6759     if not isinstance(num_cpus, int):
6760       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6761                                  " on node %s, result was '%s'" %
6762                                  (node, num_cpus), errors.ECODE_ENVIRON)
6763     if requested > num_cpus:
6764       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6765                                  "required" % (node, num_cpus, requested),
6766                                  errors.ECODE_NORES)
6767
6768
6769 class LUInstanceStartup(LogicalUnit):
6770   """Starts an instance.
6771
6772   """
6773   HPATH = "instance-start"
6774   HTYPE = constants.HTYPE_INSTANCE
6775   REQ_BGL = False
6776
6777   def CheckArguments(self):
6778     # extra beparams
6779     if self.op.beparams:
6780       # fill the beparams dict
6781       objects.UpgradeBeParams(self.op.beparams)
6782       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6783
6784   def ExpandNames(self):
6785     self._ExpandAndLockInstance()
6786     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6787
6788   def DeclareLocks(self, level):
6789     if level == locking.LEVEL_NODE_RES:
6790       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6791
6792   def BuildHooksEnv(self):
6793     """Build hooks env.
6794
6795     This runs on master, primary and secondary nodes of the instance.
6796
6797     """
6798     env = {
6799       "FORCE": self.op.force,
6800       }
6801
6802     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6803
6804     return env
6805
6806   def BuildHooksNodes(self):
6807     """Build hooks nodes.
6808
6809     """
6810     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6811     return (nl, nl)
6812
6813   def CheckPrereq(self):
6814     """Check prerequisites.
6815
6816     This checks that the instance is in the cluster.
6817
6818     """
6819     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6820     assert self.instance is not None, \
6821       "Cannot retrieve locked instance %s" % self.op.instance_name
6822
6823     # extra hvparams
6824     if self.op.hvparams:
6825       # check hypervisor parameter syntax (locally)
6826       cluster = self.cfg.GetClusterInfo()
6827       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6828       filled_hvp = cluster.FillHV(instance)
6829       filled_hvp.update(self.op.hvparams)
6830       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6831       hv_type.CheckParameterSyntax(filled_hvp)
6832       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6833
6834     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6835
6836     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6837
6838     if self.primary_offline and self.op.ignore_offline_nodes:
6839       self.proc.LogWarning("Ignoring offline primary node")
6840
6841       if self.op.hvparams or self.op.beparams:
6842         self.proc.LogWarning("Overridden parameters are ignored")
6843     else:
6844       _CheckNodeOnline(self, instance.primary_node)
6845
6846       bep = self.cfg.GetClusterInfo().FillBE(instance)
6847       bep.update(self.op.beparams)
6848
6849       # check bridges existence
6850       _CheckInstanceBridgesExist(self, instance)
6851
6852       remote_info = self.rpc.call_instance_info(instance.primary_node,
6853                                                 instance.name,
6854                                                 instance.hypervisor)
6855       remote_info.Raise("Error checking node %s" % instance.primary_node,
6856                         prereq=True, ecode=errors.ECODE_ENVIRON)
6857       if not remote_info.payload: # not running already
6858         _CheckNodeFreeMemory(self, instance.primary_node,
6859                              "starting instance %s" % instance.name,
6860                              bep[constants.BE_MINMEM], instance.hypervisor)
6861
6862   def Exec(self, feedback_fn):
6863     """Start the instance.
6864
6865     """
6866     instance = self.instance
6867     force = self.op.force
6868
6869     if not self.op.no_remember:
6870       self.cfg.MarkInstanceUp(instance.name)
6871
6872     if self.primary_offline:
6873       assert self.op.ignore_offline_nodes
6874       self.proc.LogInfo("Primary node offline, marked instance as started")
6875     else:
6876       node_current = instance.primary_node
6877
6878       _StartInstanceDisks(self, instance, force)
6879
6880       result = \
6881         self.rpc.call_instance_start(node_current,
6882                                      (instance, self.op.hvparams,
6883                                       self.op.beparams),
6884                                      self.op.startup_paused)
6885       msg = result.fail_msg
6886       if msg:
6887         _ShutdownInstanceDisks(self, instance)
6888         raise errors.OpExecError("Could not start instance: %s" % msg)
6889
6890
6891 class LUInstanceReboot(LogicalUnit):
6892   """Reboot an instance.
6893
6894   """
6895   HPATH = "instance-reboot"
6896   HTYPE = constants.HTYPE_INSTANCE
6897   REQ_BGL = False
6898
6899   def ExpandNames(self):
6900     self._ExpandAndLockInstance()
6901
6902   def BuildHooksEnv(self):
6903     """Build hooks env.
6904
6905     This runs on master, primary and secondary nodes of the instance.
6906
6907     """
6908     env = {
6909       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6910       "REBOOT_TYPE": self.op.reboot_type,
6911       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6912       }
6913
6914     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6915
6916     return env
6917
6918   def BuildHooksNodes(self):
6919     """Build hooks nodes.
6920
6921     """
6922     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6923     return (nl, nl)
6924
6925   def CheckPrereq(self):
6926     """Check prerequisites.
6927
6928     This checks that the instance is in the cluster.
6929
6930     """
6931     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6932     assert self.instance is not None, \
6933       "Cannot retrieve locked instance %s" % self.op.instance_name
6934     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6935     _CheckNodeOnline(self, instance.primary_node)
6936
6937     # check bridges existence
6938     _CheckInstanceBridgesExist(self, instance)
6939
6940   def Exec(self, feedback_fn):
6941     """Reboot the instance.
6942
6943     """
6944     instance = self.instance
6945     ignore_secondaries = self.op.ignore_secondaries
6946     reboot_type = self.op.reboot_type
6947
6948     remote_info = self.rpc.call_instance_info(instance.primary_node,
6949                                               instance.name,
6950                                               instance.hypervisor)
6951     remote_info.Raise("Error checking node %s" % instance.primary_node)
6952     instance_running = bool(remote_info.payload)
6953
6954     node_current = instance.primary_node
6955
6956     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6957                                             constants.INSTANCE_REBOOT_HARD]:
6958       for disk in instance.disks:
6959         self.cfg.SetDiskID(disk, node_current)
6960       result = self.rpc.call_instance_reboot(node_current, instance,
6961                                              reboot_type,
6962                                              self.op.shutdown_timeout)
6963       result.Raise("Could not reboot instance")
6964     else:
6965       if instance_running:
6966         result = self.rpc.call_instance_shutdown(node_current, instance,
6967                                                  self.op.shutdown_timeout)
6968         result.Raise("Could not shutdown instance for full reboot")
6969         _ShutdownInstanceDisks(self, instance)
6970       else:
6971         self.LogInfo("Instance %s was already stopped, starting now",
6972                      instance.name)
6973       _StartInstanceDisks(self, instance, ignore_secondaries)
6974       result = self.rpc.call_instance_start(node_current,
6975                                             (instance, None, None), False)
6976       msg = result.fail_msg
6977       if msg:
6978         _ShutdownInstanceDisks(self, instance)
6979         raise errors.OpExecError("Could not start instance for"
6980                                  " full reboot: %s" % msg)
6981
6982     self.cfg.MarkInstanceUp(instance.name)
6983
6984
6985 class LUInstanceShutdown(LogicalUnit):
6986   """Shutdown an instance.
6987
6988   """
6989   HPATH = "instance-stop"
6990   HTYPE = constants.HTYPE_INSTANCE
6991   REQ_BGL = False
6992
6993   def ExpandNames(self):
6994     self._ExpandAndLockInstance()
6995
6996   def BuildHooksEnv(self):
6997     """Build hooks env.
6998
6999     This runs on master, primary and secondary nodes of the instance.
7000
7001     """
7002     env = _BuildInstanceHookEnvByObject(self, self.instance)
7003     env["TIMEOUT"] = self.op.timeout
7004     return env
7005
7006   def BuildHooksNodes(self):
7007     """Build hooks nodes.
7008
7009     """
7010     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7011     return (nl, nl)
7012
7013   def CheckPrereq(self):
7014     """Check prerequisites.
7015
7016     This checks that the instance is in the cluster.
7017
7018     """
7019     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020     assert self.instance is not None, \
7021       "Cannot retrieve locked instance %s" % self.op.instance_name
7022
7023     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7024
7025     self.primary_offline = \
7026       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7027
7028     if self.primary_offline and self.op.ignore_offline_nodes:
7029       self.proc.LogWarning("Ignoring offline primary node")
7030     else:
7031       _CheckNodeOnline(self, self.instance.primary_node)
7032
7033   def Exec(self, feedback_fn):
7034     """Shutdown the instance.
7035
7036     """
7037     instance = self.instance
7038     node_current = instance.primary_node
7039     timeout = self.op.timeout
7040
7041     if not self.op.no_remember:
7042       self.cfg.MarkInstanceDown(instance.name)
7043
7044     if self.primary_offline:
7045       assert self.op.ignore_offline_nodes
7046       self.proc.LogInfo("Primary node offline, marked instance as stopped")
7047     else:
7048       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7049       msg = result.fail_msg
7050       if msg:
7051         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7052
7053       _ShutdownInstanceDisks(self, instance)
7054
7055
7056 class LUInstanceReinstall(LogicalUnit):
7057   """Reinstall an instance.
7058
7059   """
7060   HPATH = "instance-reinstall"
7061   HTYPE = constants.HTYPE_INSTANCE
7062   REQ_BGL = False
7063
7064   def ExpandNames(self):
7065     self._ExpandAndLockInstance()
7066
7067   def BuildHooksEnv(self):
7068     """Build hooks env.
7069
7070     This runs on master, primary and secondary nodes of the instance.
7071
7072     """
7073     return _BuildInstanceHookEnvByObject(self, self.instance)
7074
7075   def BuildHooksNodes(self):
7076     """Build hooks nodes.
7077
7078     """
7079     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7080     return (nl, nl)
7081
7082   def CheckPrereq(self):
7083     """Check prerequisites.
7084
7085     This checks that the instance is in the cluster and is not running.
7086
7087     """
7088     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7089     assert instance is not None, \
7090       "Cannot retrieve locked instance %s" % self.op.instance_name
7091     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7092                      " offline, cannot reinstall")
7093
7094     if instance.disk_template == constants.DT_DISKLESS:
7095       raise errors.OpPrereqError("Instance '%s' has no disks" %
7096                                  self.op.instance_name,
7097                                  errors.ECODE_INVAL)
7098     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7099
7100     if self.op.os_type is not None:
7101       # OS verification
7102       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7103       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7104       instance_os = self.op.os_type
7105     else:
7106       instance_os = instance.os
7107
7108     nodelist = list(instance.all_nodes)
7109
7110     if self.op.osparams:
7111       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7112       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7113       self.os_inst = i_osdict # the new dict (without defaults)
7114     else:
7115       self.os_inst = None
7116
7117     self.instance = instance
7118
7119   def Exec(self, feedback_fn):
7120     """Reinstall the instance.
7121
7122     """
7123     inst = self.instance
7124
7125     if self.op.os_type is not None:
7126       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7127       inst.os = self.op.os_type
7128       # Write to configuration
7129       self.cfg.Update(inst, feedback_fn)
7130
7131     _StartInstanceDisks(self, inst, None)
7132     try:
7133       feedback_fn("Running the instance OS create scripts...")
7134       # FIXME: pass debug option from opcode to backend
7135       result = self.rpc.call_instance_os_add(inst.primary_node,
7136                                              (inst, self.os_inst), True,
7137                                              self.op.debug_level)
7138       result.Raise("Could not install OS for instance %s on node %s" %
7139                    (inst.name, inst.primary_node))
7140     finally:
7141       _ShutdownInstanceDisks(self, inst)
7142
7143
7144 class LUInstanceRecreateDisks(LogicalUnit):
7145   """Recreate an instance's missing disks.
7146
7147   """
7148   HPATH = "instance-recreate-disks"
7149   HTYPE = constants.HTYPE_INSTANCE
7150   REQ_BGL = False
7151
7152   _MODIFYABLE = frozenset([
7153     constants.IDISK_SIZE,
7154     constants.IDISK_MODE,
7155     ])
7156
7157   # New or changed disk parameters may have different semantics
7158   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7159     constants.IDISK_ADOPT,
7160
7161     # TODO: Implement support changing VG while recreating
7162     constants.IDISK_VG,
7163     constants.IDISK_METAVG,
7164     ]))
7165
7166   def _RunAllocator(self):
7167     """Run the allocator based on input opcode.
7168
7169     """
7170     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7171
7172     # FIXME
7173     # The allocator should actually run in "relocate" mode, but current
7174     # allocators don't support relocating all the nodes of an instance at
7175     # the same time. As a workaround we use "allocate" mode, but this is
7176     # suboptimal for two reasons:
7177     # - The instance name passed to the allocator is present in the list of
7178     #   existing instances, so there could be a conflict within the
7179     #   internal structures of the allocator. This doesn't happen with the
7180     #   current allocators, but it's a liability.
7181     # - The allocator counts the resources used by the instance twice: once
7182     #   because the instance exists already, and once because it tries to
7183     #   allocate a new instance.
7184     # The allocator could choose some of the nodes on which the instance is
7185     # running, but that's not a problem. If the instance nodes are broken,
7186     # they should be already be marked as drained or offline, and hence
7187     # skipped by the allocator. If instance disks have been lost for other
7188     # reasons, then recreating the disks on the same nodes should be fine.
7189     disk_template = self.instance.disk_template
7190     spindle_use = be_full[constants.BE_SPINDLE_USE]
7191     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7192                                         disk_template=disk_template,
7193                                         tags=list(self.instance.GetTags()),
7194                                         os=self.instance.os,
7195                                         nics=[{}],
7196                                         vcpus=be_full[constants.BE_VCPUS],
7197                                         memory=be_full[constants.BE_MAXMEM],
7198                                         spindle_use=spindle_use,
7199                                         disks=[{constants.IDISK_SIZE: d.size,
7200                                                 constants.IDISK_MODE: d.mode}
7201                                                 for d in self.instance.disks],
7202                                         hypervisor=self.instance.hypervisor)
7203     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7204
7205     ial.Run(self.op.iallocator)
7206
7207     assert req.RequiredNodes() == len(self.instance.all_nodes)
7208
7209     if not ial.success:
7210       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7211                                  " %s" % (self.op.iallocator, ial.info),
7212                                  errors.ECODE_NORES)
7213
7214     self.op.nodes = ial.result
7215     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7216                  self.op.instance_name, self.op.iallocator,
7217                  utils.CommaJoin(ial.result))
7218
7219   def CheckArguments(self):
7220     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7221       # Normalize and convert deprecated list of disk indices
7222       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7223
7224     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7225     if duplicates:
7226       raise errors.OpPrereqError("Some disks have been specified more than"
7227                                  " once: %s" % utils.CommaJoin(duplicates),
7228                                  errors.ECODE_INVAL)
7229
7230     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7231     # when neither iallocator nor nodes are specified
7232     if self.op.iallocator or self.op.nodes:
7233       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7234
7235     for (idx, params) in self.op.disks:
7236       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7237       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7238       if unsupported:
7239         raise errors.OpPrereqError("Parameters for disk %s try to change"
7240                                    " unmodifyable parameter(s): %s" %
7241                                    (idx, utils.CommaJoin(unsupported)),
7242                                    errors.ECODE_INVAL)
7243
7244   def ExpandNames(self):
7245     self._ExpandAndLockInstance()
7246     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7247     if self.op.nodes:
7248       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7249       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7250     else:
7251       self.needed_locks[locking.LEVEL_NODE] = []
7252       if self.op.iallocator:
7253         # iallocator will select a new node in the same group
7254         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7255     self.needed_locks[locking.LEVEL_NODE_RES] = []
7256
7257   def DeclareLocks(self, level):
7258     if level == locking.LEVEL_NODEGROUP:
7259       assert self.op.iallocator is not None
7260       assert not self.op.nodes
7261       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7262       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7263       # Lock the primary group used by the instance optimistically; this
7264       # requires going via the node before it's locked, requiring
7265       # verification later on
7266       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7267         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7268
7269     elif level == locking.LEVEL_NODE:
7270       # If an allocator is used, then we lock all the nodes in the current
7271       # instance group, as we don't know yet which ones will be selected;
7272       # if we replace the nodes without using an allocator, locks are
7273       # already declared in ExpandNames; otherwise, we need to lock all the
7274       # instance nodes for disk re-creation
7275       if self.op.iallocator:
7276         assert not self.op.nodes
7277         assert not self.needed_locks[locking.LEVEL_NODE]
7278         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7279
7280         # Lock member nodes of the group of the primary node
7281         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7282           self.needed_locks[locking.LEVEL_NODE].extend(
7283             self.cfg.GetNodeGroup(group_uuid).members)
7284       elif not self.op.nodes:
7285         self._LockInstancesNodes(primary_only=False)
7286     elif level == locking.LEVEL_NODE_RES:
7287       # Copy node locks
7288       self.needed_locks[locking.LEVEL_NODE_RES] = \
7289         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7290
7291   def BuildHooksEnv(self):
7292     """Build hooks env.
7293
7294     This runs on master, primary and secondary nodes of the instance.
7295
7296     """
7297     return _BuildInstanceHookEnvByObject(self, self.instance)
7298
7299   def BuildHooksNodes(self):
7300     """Build hooks nodes.
7301
7302     """
7303     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7304     return (nl, nl)
7305
7306   def CheckPrereq(self):
7307     """Check prerequisites.
7308
7309     This checks that the instance is in the cluster and is not running.
7310
7311     """
7312     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7313     assert instance is not None, \
7314       "Cannot retrieve locked instance %s" % self.op.instance_name
7315     if self.op.nodes:
7316       if len(self.op.nodes) != len(instance.all_nodes):
7317         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7318                                    " %d replacement nodes were specified" %
7319                                    (instance.name, len(instance.all_nodes),
7320                                     len(self.op.nodes)),
7321                                    errors.ECODE_INVAL)
7322       assert instance.disk_template != constants.DT_DRBD8 or \
7323           len(self.op.nodes) == 2
7324       assert instance.disk_template != constants.DT_PLAIN or \
7325           len(self.op.nodes) == 1
7326       primary_node = self.op.nodes[0]
7327     else:
7328       primary_node = instance.primary_node
7329     if not self.op.iallocator:
7330       _CheckNodeOnline(self, primary_node)
7331
7332     if instance.disk_template == constants.DT_DISKLESS:
7333       raise errors.OpPrereqError("Instance '%s' has no disks" %
7334                                  self.op.instance_name, errors.ECODE_INVAL)
7335
7336     # Verify if node group locks are still correct
7337     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7338     if owned_groups:
7339       # Node group locks are acquired only for the primary node (and only
7340       # when the allocator is used)
7341       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7342                                primary_only=True)
7343
7344     # if we replace nodes *and* the old primary is offline, we don't
7345     # check the instance state
7346     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7347     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7348       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7349                           msg="cannot recreate disks")
7350
7351     if self.op.disks:
7352       self.disks = dict(self.op.disks)
7353     else:
7354       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7355
7356     maxidx = max(self.disks.keys())
7357     if maxidx >= len(instance.disks):
7358       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7359                                  errors.ECODE_INVAL)
7360
7361     if ((self.op.nodes or self.op.iallocator) and
7362         sorted(self.disks.keys()) != range(len(instance.disks))):
7363       raise errors.OpPrereqError("Can't recreate disks partially and"
7364                                  " change the nodes at the same time",
7365                                  errors.ECODE_INVAL)
7366
7367     self.instance = instance
7368
7369     if self.op.iallocator:
7370       self._RunAllocator()
7371       # Release unneeded node and node resource locks
7372       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7373       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7374
7375   def Exec(self, feedback_fn):
7376     """Recreate the disks.
7377
7378     """
7379     instance = self.instance
7380
7381     assert (self.owned_locks(locking.LEVEL_NODE) ==
7382             self.owned_locks(locking.LEVEL_NODE_RES))
7383
7384     to_skip = []
7385     mods = [] # keeps track of needed changes
7386
7387     for idx, disk in enumerate(instance.disks):
7388       try:
7389         changes = self.disks[idx]
7390       except KeyError:
7391         # Disk should not be recreated
7392         to_skip.append(idx)
7393         continue
7394
7395       # update secondaries for disks, if needed
7396       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7397         # need to update the nodes and minors
7398         assert len(self.op.nodes) == 2
7399         assert len(disk.logical_id) == 6 # otherwise disk internals
7400                                          # have changed
7401         (_, _, old_port, _, _, old_secret) = disk.logical_id
7402         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7403         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7404                   new_minors[0], new_minors[1], old_secret)
7405         assert len(disk.logical_id) == len(new_id)
7406       else:
7407         new_id = None
7408
7409       mods.append((idx, new_id, changes))
7410
7411     # now that we have passed all asserts above, we can apply the mods
7412     # in a single run (to avoid partial changes)
7413     for idx, new_id, changes in mods:
7414       disk = instance.disks[idx]
7415       if new_id is not None:
7416         assert disk.dev_type == constants.LD_DRBD8
7417         disk.logical_id = new_id
7418       if changes:
7419         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7420                     mode=changes.get(constants.IDISK_MODE, None))
7421
7422     # change primary node, if needed
7423     if self.op.nodes:
7424       instance.primary_node = self.op.nodes[0]
7425       self.LogWarning("Changing the instance's nodes, you will have to"
7426                       " remove any disks left on the older nodes manually")
7427
7428     if self.op.nodes:
7429       self.cfg.Update(instance, feedback_fn)
7430
7431     # All touched nodes must be locked
7432     mylocks = self.owned_locks(locking.LEVEL_NODE)
7433     assert mylocks.issuperset(frozenset(instance.all_nodes))
7434     _CreateDisks(self, instance, to_skip=to_skip)
7435
7436
7437 class LUInstanceRename(LogicalUnit):
7438   """Rename an instance.
7439
7440   """
7441   HPATH = "instance-rename"
7442   HTYPE = constants.HTYPE_INSTANCE
7443
7444   def CheckArguments(self):
7445     """Check arguments.
7446
7447     """
7448     if self.op.ip_check and not self.op.name_check:
7449       # TODO: make the ip check more flexible and not depend on the name check
7450       raise errors.OpPrereqError("IP address check requires a name check",
7451                                  errors.ECODE_INVAL)
7452
7453   def BuildHooksEnv(self):
7454     """Build hooks env.
7455
7456     This runs on master, primary and secondary nodes of the instance.
7457
7458     """
7459     env = _BuildInstanceHookEnvByObject(self, self.instance)
7460     env["INSTANCE_NEW_NAME"] = self.op.new_name
7461     return env
7462
7463   def BuildHooksNodes(self):
7464     """Build hooks nodes.
7465
7466     """
7467     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7468     return (nl, nl)
7469
7470   def CheckPrereq(self):
7471     """Check prerequisites.
7472
7473     This checks that the instance is in the cluster and is not running.
7474
7475     """
7476     self.op.instance_name = _ExpandInstanceName(self.cfg,
7477                                                 self.op.instance_name)
7478     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7479     assert instance is not None
7480     _CheckNodeOnline(self, instance.primary_node)
7481     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7482                         msg="cannot rename")
7483     self.instance = instance
7484
7485     new_name = self.op.new_name
7486     if self.op.name_check:
7487       hostname = _CheckHostnameSane(self, new_name)
7488       new_name = self.op.new_name = hostname.name
7489       if (self.op.ip_check and
7490           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7491         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7492                                    (hostname.ip, new_name),
7493                                    errors.ECODE_NOTUNIQUE)
7494
7495     instance_list = self.cfg.GetInstanceList()
7496     if new_name in instance_list and new_name != instance.name:
7497       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7498                                  new_name, errors.ECODE_EXISTS)
7499
7500   def Exec(self, feedback_fn):
7501     """Rename the instance.
7502
7503     """
7504     inst = self.instance
7505     old_name = inst.name
7506
7507     rename_file_storage = False
7508     if (inst.disk_template in constants.DTS_FILEBASED and
7509         self.op.new_name != inst.name):
7510       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7511       rename_file_storage = True
7512
7513     self.cfg.RenameInstance(inst.name, self.op.new_name)
7514     # Change the instance lock. This is definitely safe while we hold the BGL.
7515     # Otherwise the new lock would have to be added in acquired mode.
7516     assert self.REQ_BGL
7517     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7518     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7519
7520     # re-read the instance from the configuration after rename
7521     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7522
7523     if rename_file_storage:
7524       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7525       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7526                                                      old_file_storage_dir,
7527                                                      new_file_storage_dir)
7528       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7529                    " (but the instance has been renamed in Ganeti)" %
7530                    (inst.primary_node, old_file_storage_dir,
7531                     new_file_storage_dir))
7532
7533     _StartInstanceDisks(self, inst, None)
7534     # update info on disks
7535     info = _GetInstanceInfoText(inst)
7536     for (idx, disk) in enumerate(inst.disks):
7537       for node in inst.all_nodes:
7538         self.cfg.SetDiskID(disk, node)
7539         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7540         if result.fail_msg:
7541           self.LogWarning("Error setting info on node %s for disk %s: %s",
7542                           node, idx, result.fail_msg)
7543     try:
7544       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7545                                                  old_name, self.op.debug_level)
7546       msg = result.fail_msg
7547       if msg:
7548         msg = ("Could not run OS rename script for instance %s on node %s"
7549                " (but the instance has been renamed in Ganeti): %s" %
7550                (inst.name, inst.primary_node, msg))
7551         self.proc.LogWarning(msg)
7552     finally:
7553       _ShutdownInstanceDisks(self, inst)
7554
7555     return inst.name
7556
7557
7558 class LUInstanceRemove(LogicalUnit):
7559   """Remove an instance.
7560
7561   """
7562   HPATH = "instance-remove"
7563   HTYPE = constants.HTYPE_INSTANCE
7564   REQ_BGL = False
7565
7566   def ExpandNames(self):
7567     self._ExpandAndLockInstance()
7568     self.needed_locks[locking.LEVEL_NODE] = []
7569     self.needed_locks[locking.LEVEL_NODE_RES] = []
7570     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7571
7572   def DeclareLocks(self, level):
7573     if level == locking.LEVEL_NODE:
7574       self._LockInstancesNodes()
7575     elif level == locking.LEVEL_NODE_RES:
7576       # Copy node locks
7577       self.needed_locks[locking.LEVEL_NODE_RES] = \
7578         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7579
7580   def BuildHooksEnv(self):
7581     """Build hooks env.
7582
7583     This runs on master, primary and secondary nodes of the instance.
7584
7585     """
7586     env = _BuildInstanceHookEnvByObject(self, self.instance)
7587     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7588     return env
7589
7590   def BuildHooksNodes(self):
7591     """Build hooks nodes.
7592
7593     """
7594     nl = [self.cfg.GetMasterNode()]
7595     nl_post = list(self.instance.all_nodes) + nl
7596     return (nl, nl_post)
7597
7598   def CheckPrereq(self):
7599     """Check prerequisites.
7600
7601     This checks that the instance is in the cluster.
7602
7603     """
7604     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7605     assert self.instance is not None, \
7606       "Cannot retrieve locked instance %s" % self.op.instance_name
7607
7608   def Exec(self, feedback_fn):
7609     """Remove the instance.
7610
7611     """
7612     instance = self.instance
7613     logging.info("Shutting down instance %s on node %s",
7614                  instance.name, instance.primary_node)
7615
7616     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7617                                              self.op.shutdown_timeout)
7618     msg = result.fail_msg
7619     if msg:
7620       if self.op.ignore_failures:
7621         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7622       else:
7623         raise errors.OpExecError("Could not shutdown instance %s on"
7624                                  " node %s: %s" %
7625                                  (instance.name, instance.primary_node, msg))
7626
7627     assert (self.owned_locks(locking.LEVEL_NODE) ==
7628             self.owned_locks(locking.LEVEL_NODE_RES))
7629     assert not (set(instance.all_nodes) -
7630                 self.owned_locks(locking.LEVEL_NODE)), \
7631       "Not owning correct locks"
7632
7633     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7634
7635
7636 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7637   """Utility function to remove an instance.
7638
7639   """
7640   logging.info("Removing block devices for instance %s", instance.name)
7641
7642   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7643     if not ignore_failures:
7644       raise errors.OpExecError("Can't remove instance's disks")
7645     feedback_fn("Warning: can't remove instance's disks")
7646
7647   logging.info("Removing instance %s out of cluster config", instance.name)
7648
7649   lu.cfg.RemoveInstance(instance.name)
7650
7651   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7652     "Instance lock removal conflict"
7653
7654   # Remove lock for the instance
7655   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7656
7657
7658 class LUInstanceQuery(NoHooksLU):
7659   """Logical unit for querying instances.
7660
7661   """
7662   # pylint: disable=W0142
7663   REQ_BGL = False
7664
7665   def CheckArguments(self):
7666     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7667                              self.op.output_fields, self.op.use_locking)
7668
7669   def ExpandNames(self):
7670     self.iq.ExpandNames(self)
7671
7672   def DeclareLocks(self, level):
7673     self.iq.DeclareLocks(self, level)
7674
7675   def Exec(self, feedback_fn):
7676     return self.iq.OldStyleQuery(self)
7677
7678
7679 class LUInstanceFailover(LogicalUnit):
7680   """Failover an instance.
7681
7682   """
7683   HPATH = "instance-failover"
7684   HTYPE = constants.HTYPE_INSTANCE
7685   REQ_BGL = False
7686
7687   def CheckArguments(self):
7688     """Check the arguments.
7689
7690     """
7691     self.iallocator = getattr(self.op, "iallocator", None)
7692     self.target_node = getattr(self.op, "target_node", None)
7693
7694   def ExpandNames(self):
7695     self._ExpandAndLockInstance()
7696
7697     if self.op.target_node is not None:
7698       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7699
7700     self.needed_locks[locking.LEVEL_NODE] = []
7701     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7702
7703     self.needed_locks[locking.LEVEL_NODE_RES] = []
7704     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7705
7706     ignore_consistency = self.op.ignore_consistency
7707     shutdown_timeout = self.op.shutdown_timeout
7708     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7709                                        cleanup=False,
7710                                        failover=True,
7711                                        ignore_consistency=ignore_consistency,
7712                                        shutdown_timeout=shutdown_timeout,
7713                                        ignore_ipolicy=self.op.ignore_ipolicy)
7714     self.tasklets = [self._migrater]
7715
7716   def DeclareLocks(self, level):
7717     if level == locking.LEVEL_NODE:
7718       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7719       if instance.disk_template in constants.DTS_EXT_MIRROR:
7720         if self.op.target_node is None:
7721           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7722         else:
7723           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7724                                                    self.op.target_node]
7725         del self.recalculate_locks[locking.LEVEL_NODE]
7726       else:
7727         self._LockInstancesNodes()
7728     elif level == locking.LEVEL_NODE_RES:
7729       # Copy node locks
7730       self.needed_locks[locking.LEVEL_NODE_RES] = \
7731         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7732
7733   def BuildHooksEnv(self):
7734     """Build hooks env.
7735
7736     This runs on master, primary and secondary nodes of the instance.
7737
7738     """
7739     instance = self._migrater.instance
7740     source_node = instance.primary_node
7741     target_node = self.op.target_node
7742     env = {
7743       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7744       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7745       "OLD_PRIMARY": source_node,
7746       "NEW_PRIMARY": target_node,
7747       }
7748
7749     if instance.disk_template in constants.DTS_INT_MIRROR:
7750       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7751       env["NEW_SECONDARY"] = source_node
7752     else:
7753       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7754
7755     env.update(_BuildInstanceHookEnvByObject(self, instance))
7756
7757     return env
7758
7759   def BuildHooksNodes(self):
7760     """Build hooks nodes.
7761
7762     """
7763     instance = self._migrater.instance
7764     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7765     return (nl, nl + [instance.primary_node])
7766
7767
7768 class LUInstanceMigrate(LogicalUnit):
7769   """Migrate an instance.
7770
7771   This is migration without shutting down, compared to the failover,
7772   which is done with shutdown.
7773
7774   """
7775   HPATH = "instance-migrate"
7776   HTYPE = constants.HTYPE_INSTANCE
7777   REQ_BGL = False
7778
7779   def ExpandNames(self):
7780     self._ExpandAndLockInstance()
7781
7782     if self.op.target_node is not None:
7783       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7784
7785     self.needed_locks[locking.LEVEL_NODE] = []
7786     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7787
7788     self.needed_locks[locking.LEVEL_NODE] = []
7789     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7790
7791     self._migrater = \
7792       TLMigrateInstance(self, self.op.instance_name,
7793                         cleanup=self.op.cleanup,
7794                         failover=False,
7795                         fallback=self.op.allow_failover,
7796                         allow_runtime_changes=self.op.allow_runtime_changes,
7797                         ignore_ipolicy=self.op.ignore_ipolicy)
7798     self.tasklets = [self._migrater]
7799
7800   def DeclareLocks(self, level):
7801     if level == locking.LEVEL_NODE:
7802       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7803       if instance.disk_template in constants.DTS_EXT_MIRROR:
7804         if self.op.target_node is None:
7805           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7806         else:
7807           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7808                                                    self.op.target_node]
7809         del self.recalculate_locks[locking.LEVEL_NODE]
7810       else:
7811         self._LockInstancesNodes()
7812     elif level == locking.LEVEL_NODE_RES:
7813       # Copy node locks
7814       self.needed_locks[locking.LEVEL_NODE_RES] = \
7815         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7816
7817   def BuildHooksEnv(self):
7818     """Build hooks env.
7819
7820     This runs on master, primary and secondary nodes of the instance.
7821
7822     """
7823     instance = self._migrater.instance
7824     source_node = instance.primary_node
7825     target_node = self.op.target_node
7826     env = _BuildInstanceHookEnvByObject(self, instance)
7827     env.update({
7828       "MIGRATE_LIVE": self._migrater.live,
7829       "MIGRATE_CLEANUP": self.op.cleanup,
7830       "OLD_PRIMARY": source_node,
7831       "NEW_PRIMARY": target_node,
7832       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7833       })
7834
7835     if instance.disk_template in constants.DTS_INT_MIRROR:
7836       env["OLD_SECONDARY"] = target_node
7837       env["NEW_SECONDARY"] = source_node
7838     else:
7839       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7840
7841     return env
7842
7843   def BuildHooksNodes(self):
7844     """Build hooks nodes.
7845
7846     """
7847     instance = self._migrater.instance
7848     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7849     return (nl, nl + [instance.primary_node])
7850
7851
7852 class LUInstanceMove(LogicalUnit):
7853   """Move an instance by data-copying.
7854
7855   """
7856   HPATH = "instance-move"
7857   HTYPE = constants.HTYPE_INSTANCE
7858   REQ_BGL = False
7859
7860   def ExpandNames(self):
7861     self._ExpandAndLockInstance()
7862     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7863     self.op.target_node = target_node
7864     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7865     self.needed_locks[locking.LEVEL_NODE_RES] = []
7866     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7867
7868   def DeclareLocks(self, level):
7869     if level == locking.LEVEL_NODE:
7870       self._LockInstancesNodes(primary_only=True)
7871     elif level == locking.LEVEL_NODE_RES:
7872       # Copy node locks
7873       self.needed_locks[locking.LEVEL_NODE_RES] = \
7874         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7875
7876   def BuildHooksEnv(self):
7877     """Build hooks env.
7878
7879     This runs on master, primary and secondary nodes of the instance.
7880
7881     """
7882     env = {
7883       "TARGET_NODE": self.op.target_node,
7884       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7885       }
7886     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7887     return env
7888
7889   def BuildHooksNodes(self):
7890     """Build hooks nodes.
7891
7892     """
7893     nl = [
7894       self.cfg.GetMasterNode(),
7895       self.instance.primary_node,
7896       self.op.target_node,
7897       ]
7898     return (nl, nl)
7899
7900   def CheckPrereq(self):
7901     """Check prerequisites.
7902
7903     This checks that the instance is in the cluster.
7904
7905     """
7906     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7907     assert self.instance is not None, \
7908       "Cannot retrieve locked instance %s" % self.op.instance_name
7909
7910     node = self.cfg.GetNodeInfo(self.op.target_node)
7911     assert node is not None, \
7912       "Cannot retrieve locked node %s" % self.op.target_node
7913
7914     self.target_node = target_node = node.name
7915
7916     if target_node == instance.primary_node:
7917       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7918                                  (instance.name, target_node),
7919                                  errors.ECODE_STATE)
7920
7921     bep = self.cfg.GetClusterInfo().FillBE(instance)
7922
7923     for idx, dsk in enumerate(instance.disks):
7924       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7925         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7926                                    " cannot copy" % idx, errors.ECODE_STATE)
7927
7928     _CheckNodeOnline(self, target_node)
7929     _CheckNodeNotDrained(self, target_node)
7930     _CheckNodeVmCapable(self, target_node)
7931     cluster = self.cfg.GetClusterInfo()
7932     group_info = self.cfg.GetNodeGroup(node.group)
7933     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7934     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7935                             ignore=self.op.ignore_ipolicy)
7936
7937     if instance.admin_state == constants.ADMINST_UP:
7938       # check memory requirements on the secondary node
7939       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7940                            instance.name, bep[constants.BE_MAXMEM],
7941                            instance.hypervisor)
7942     else:
7943       self.LogInfo("Not checking memory on the secondary node as"
7944                    " instance will not be started")
7945
7946     # check bridge existance
7947     _CheckInstanceBridgesExist(self, instance, node=target_node)
7948
7949   def Exec(self, feedback_fn):
7950     """Move an instance.
7951
7952     The move is done by shutting it down on its present node, copying
7953     the data over (slow) and starting it on the new node.
7954
7955     """
7956     instance = self.instance
7957
7958     source_node = instance.primary_node
7959     target_node = self.target_node
7960
7961     self.LogInfo("Shutting down instance %s on source node %s",
7962                  instance.name, source_node)
7963
7964     assert (self.owned_locks(locking.LEVEL_NODE) ==
7965             self.owned_locks(locking.LEVEL_NODE_RES))
7966
7967     result = self.rpc.call_instance_shutdown(source_node, instance,
7968                                              self.op.shutdown_timeout)
7969     msg = result.fail_msg
7970     if msg:
7971       if self.op.ignore_consistency:
7972         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7973                              " Proceeding anyway. Please make sure node"
7974                              " %s is down. Error details: %s",
7975                              instance.name, source_node, source_node, msg)
7976       else:
7977         raise errors.OpExecError("Could not shutdown instance %s on"
7978                                  " node %s: %s" %
7979                                  (instance.name, source_node, msg))
7980
7981     # create the target disks
7982     try:
7983       _CreateDisks(self, instance, target_node=target_node)
7984     except errors.OpExecError:
7985       self.LogWarning("Device creation failed, reverting...")
7986       try:
7987         _RemoveDisks(self, instance, target_node=target_node)
7988       finally:
7989         self.cfg.ReleaseDRBDMinors(instance.name)
7990         raise
7991
7992     cluster_name = self.cfg.GetClusterInfo().cluster_name
7993
7994     errs = []
7995     # activate, get path, copy the data over
7996     for idx, disk in enumerate(instance.disks):
7997       self.LogInfo("Copying data for disk %d", idx)
7998       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7999                                                instance.name, True, idx)
8000       if result.fail_msg:
8001         self.LogWarning("Can't assemble newly created disk %d: %s",
8002                         idx, result.fail_msg)
8003         errs.append(result.fail_msg)
8004         break
8005       dev_path = result.payload
8006       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8007                                              target_node, dev_path,
8008                                              cluster_name)
8009       if result.fail_msg:
8010         self.LogWarning("Can't copy data over for disk %d: %s",
8011                         idx, result.fail_msg)
8012         errs.append(result.fail_msg)
8013         break
8014
8015     if errs:
8016       self.LogWarning("Some disks failed to copy, aborting")
8017       try:
8018         _RemoveDisks(self, instance, target_node=target_node)
8019       finally:
8020         self.cfg.ReleaseDRBDMinors(instance.name)
8021         raise errors.OpExecError("Errors during disk copy: %s" %
8022                                  (",".join(errs),))
8023
8024     instance.primary_node = target_node
8025     self.cfg.Update(instance, feedback_fn)
8026
8027     self.LogInfo("Removing the disks on the original node")
8028     _RemoveDisks(self, instance, target_node=source_node)
8029
8030     # Only start the instance if it's marked as up
8031     if instance.admin_state == constants.ADMINST_UP:
8032       self.LogInfo("Starting instance %s on node %s",
8033                    instance.name, target_node)
8034
8035       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8036                                            ignore_secondaries=True)
8037       if not disks_ok:
8038         _ShutdownInstanceDisks(self, instance)
8039         raise errors.OpExecError("Can't activate the instance's disks")
8040
8041       result = self.rpc.call_instance_start(target_node,
8042                                             (instance, None, None), False)
8043       msg = result.fail_msg
8044       if msg:
8045         _ShutdownInstanceDisks(self, instance)
8046         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8047                                  (instance.name, target_node, msg))
8048
8049
8050 class LUNodeMigrate(LogicalUnit):
8051   """Migrate all instances from a node.
8052
8053   """
8054   HPATH = "node-migrate"
8055   HTYPE = constants.HTYPE_NODE
8056   REQ_BGL = False
8057
8058   def CheckArguments(self):
8059     pass
8060
8061   def ExpandNames(self):
8062     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8063
8064     self.share_locks = _ShareAll()
8065     self.needed_locks = {
8066       locking.LEVEL_NODE: [self.op.node_name],
8067       }
8068
8069   def BuildHooksEnv(self):
8070     """Build hooks env.
8071
8072     This runs on the master, the primary and all the secondaries.
8073
8074     """
8075     return {
8076       "NODE_NAME": self.op.node_name,
8077       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8078       }
8079
8080   def BuildHooksNodes(self):
8081     """Build hooks nodes.
8082
8083     """
8084     nl = [self.cfg.GetMasterNode()]
8085     return (nl, nl)
8086
8087   def CheckPrereq(self):
8088     pass
8089
8090   def Exec(self, feedback_fn):
8091     # Prepare jobs for migration instances
8092     allow_runtime_changes = self.op.allow_runtime_changes
8093     jobs = [
8094       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8095                                  mode=self.op.mode,
8096                                  live=self.op.live,
8097                                  iallocator=self.op.iallocator,
8098                                  target_node=self.op.target_node,
8099                                  allow_runtime_changes=allow_runtime_changes,
8100                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8101       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8102       ]
8103
8104     # TODO: Run iallocator in this opcode and pass correct placement options to
8105     # OpInstanceMigrate. Since other jobs can modify the cluster between
8106     # running the iallocator and the actual migration, a good consistency model
8107     # will have to be found.
8108
8109     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8110             frozenset([self.op.node_name]))
8111
8112     return ResultWithJobs(jobs)
8113
8114
8115 class TLMigrateInstance(Tasklet):
8116   """Tasklet class for instance migration.
8117
8118   @type live: boolean
8119   @ivar live: whether the migration will be done live or non-live;
8120       this variable is initalized only after CheckPrereq has run
8121   @type cleanup: boolean
8122   @ivar cleanup: Wheater we cleanup from a failed migration
8123   @type iallocator: string
8124   @ivar iallocator: The iallocator used to determine target_node
8125   @type target_node: string
8126   @ivar target_node: If given, the target_node to reallocate the instance to
8127   @type failover: boolean
8128   @ivar failover: Whether operation results in failover or migration
8129   @type fallback: boolean
8130   @ivar fallback: Whether fallback to failover is allowed if migration not
8131                   possible
8132   @type ignore_consistency: boolean
8133   @ivar ignore_consistency: Wheter we should ignore consistency between source
8134                             and target node
8135   @type shutdown_timeout: int
8136   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8137   @type ignore_ipolicy: bool
8138   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8139
8140   """
8141
8142   # Constants
8143   _MIGRATION_POLL_INTERVAL = 1      # seconds
8144   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8145
8146   def __init__(self, lu, instance_name, cleanup=False,
8147                failover=False, fallback=False,
8148                ignore_consistency=False,
8149                allow_runtime_changes=True,
8150                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8151                ignore_ipolicy=False):
8152     """Initializes this class.
8153
8154     """
8155     Tasklet.__init__(self, lu)
8156
8157     # Parameters
8158     self.instance_name = instance_name
8159     self.cleanup = cleanup
8160     self.live = False # will be overridden later
8161     self.failover = failover
8162     self.fallback = fallback
8163     self.ignore_consistency = ignore_consistency
8164     self.shutdown_timeout = shutdown_timeout
8165     self.ignore_ipolicy = ignore_ipolicy
8166     self.allow_runtime_changes = allow_runtime_changes
8167
8168   def CheckPrereq(self):
8169     """Check prerequisites.
8170
8171     This checks that the instance is in the cluster.
8172
8173     """
8174     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8175     instance = self.cfg.GetInstanceInfo(instance_name)
8176     assert instance is not None
8177     self.instance = instance
8178     cluster = self.cfg.GetClusterInfo()
8179
8180     if (not self.cleanup and
8181         not instance.admin_state == constants.ADMINST_UP and
8182         not self.failover and self.fallback):
8183       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8184                       " switching to failover")
8185       self.failover = True
8186
8187     if instance.disk_template not in constants.DTS_MIRRORED:
8188       if self.failover:
8189         text = "failovers"
8190       else:
8191         text = "migrations"
8192       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8193                                  " %s" % (instance.disk_template, text),
8194                                  errors.ECODE_STATE)
8195
8196     if instance.disk_template in constants.DTS_EXT_MIRROR:
8197       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8198
8199       if self.lu.op.iallocator:
8200         self._RunAllocator()
8201       else:
8202         # We set set self.target_node as it is required by
8203         # BuildHooksEnv
8204         self.target_node = self.lu.op.target_node
8205
8206       # Check that the target node is correct in terms of instance policy
8207       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8208       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8209       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8210                                                               group_info)
8211       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8212                               ignore=self.ignore_ipolicy)
8213
8214       # self.target_node is already populated, either directly or by the
8215       # iallocator run
8216       target_node = self.target_node
8217       if self.target_node == instance.primary_node:
8218         raise errors.OpPrereqError("Cannot migrate instance %s"
8219                                    " to its primary (%s)" %
8220                                    (instance.name, instance.primary_node),
8221                                    errors.ECODE_STATE)
8222
8223       if len(self.lu.tasklets) == 1:
8224         # It is safe to release locks only when we're the only tasklet
8225         # in the LU
8226         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8227                       keep=[instance.primary_node, self.target_node])
8228
8229     else:
8230       secondary_nodes = instance.secondary_nodes
8231       if not secondary_nodes:
8232         raise errors.ConfigurationError("No secondary node but using"
8233                                         " %s disk template" %
8234                                         instance.disk_template)
8235       target_node = secondary_nodes[0]
8236       if self.lu.op.iallocator or (self.lu.op.target_node and
8237                                    self.lu.op.target_node != target_node):
8238         if self.failover:
8239           text = "failed over"
8240         else:
8241           text = "migrated"
8242         raise errors.OpPrereqError("Instances with disk template %s cannot"
8243                                    " be %s to arbitrary nodes"
8244                                    " (neither an iallocator nor a target"
8245                                    " node can be passed)" %
8246                                    (instance.disk_template, text),
8247                                    errors.ECODE_INVAL)
8248       nodeinfo = self.cfg.GetNodeInfo(target_node)
8249       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8250       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8251                                                               group_info)
8252       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8253                               ignore=self.ignore_ipolicy)
8254
8255     i_be = cluster.FillBE(instance)
8256
8257     # check memory requirements on the secondary node
8258     if (not self.cleanup and
8259          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8260       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8261                                                "migrating instance %s" %
8262                                                instance.name,
8263                                                i_be[constants.BE_MINMEM],
8264                                                instance.hypervisor)
8265     else:
8266       self.lu.LogInfo("Not checking memory on the secondary node as"
8267                       " instance will not be started")
8268
8269     # check if failover must be forced instead of migration
8270     if (not self.cleanup and not self.failover and
8271         i_be[constants.BE_ALWAYS_FAILOVER]):
8272       self.lu.LogInfo("Instance configured to always failover; fallback"
8273                       " to failover")
8274       self.failover = True
8275
8276     # check bridge existance
8277     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8278
8279     if not self.cleanup:
8280       _CheckNodeNotDrained(self.lu, target_node)
8281       if not self.failover:
8282         result = self.rpc.call_instance_migratable(instance.primary_node,
8283                                                    instance)
8284         if result.fail_msg and self.fallback:
8285           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8286                           " failover")
8287           self.failover = True
8288         else:
8289           result.Raise("Can't migrate, please use failover",
8290                        prereq=True, ecode=errors.ECODE_STATE)
8291
8292     assert not (self.failover and self.cleanup)
8293
8294     if not self.failover:
8295       if self.lu.op.live is not None and self.lu.op.mode is not None:
8296         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8297                                    " parameters are accepted",
8298                                    errors.ECODE_INVAL)
8299       if self.lu.op.live is not None:
8300         if self.lu.op.live:
8301           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8302         else:
8303           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8304         # reset the 'live' parameter to None so that repeated
8305         # invocations of CheckPrereq do not raise an exception
8306         self.lu.op.live = None
8307       elif self.lu.op.mode is None:
8308         # read the default value from the hypervisor
8309         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8310         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8311
8312       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8313     else:
8314       # Failover is never live
8315       self.live = False
8316
8317     if not (self.failover or self.cleanup):
8318       remote_info = self.rpc.call_instance_info(instance.primary_node,
8319                                                 instance.name,
8320                                                 instance.hypervisor)
8321       remote_info.Raise("Error checking instance on node %s" %
8322                         instance.primary_node)
8323       instance_running = bool(remote_info.payload)
8324       if instance_running:
8325         self.current_mem = int(remote_info.payload["memory"])
8326
8327   def _RunAllocator(self):
8328     """Run the allocator based on input opcode.
8329
8330     """
8331     # FIXME: add a self.ignore_ipolicy option
8332     req = iallocator.IAReqRelocate(name=self.instance_name,
8333                                    relocate_from=[self.instance.primary_node])
8334     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8335
8336     ial.Run(self.lu.op.iallocator)
8337
8338     if not ial.success:
8339       raise errors.OpPrereqError("Can't compute nodes using"
8340                                  " iallocator '%s': %s" %
8341                                  (self.lu.op.iallocator, ial.info),
8342                                  errors.ECODE_NORES)
8343     self.target_node = ial.result[0]
8344     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345                     self.instance_name, self.lu.op.iallocator,
8346                     utils.CommaJoin(ial.result))
8347
8348   def _WaitUntilSync(self):
8349     """Poll with custom rpc for disk sync.
8350
8351     This uses our own step-based rpc call.
8352
8353     """
8354     self.feedback_fn("* wait until resync is done")
8355     all_done = False
8356     while not all_done:
8357       all_done = True
8358       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8359                                             self.nodes_ip,
8360                                             (self.instance.disks,
8361                                              self.instance))
8362       min_percent = 100
8363       for node, nres in result.items():
8364         nres.Raise("Cannot resync disks on node %s" % node)
8365         node_done, node_percent = nres.payload
8366         all_done = all_done and node_done
8367         if node_percent is not None:
8368           min_percent = min(min_percent, node_percent)
8369       if not all_done:
8370         if min_percent < 100:
8371           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8372         time.sleep(2)
8373
8374   def _EnsureSecondary(self, node):
8375     """Demote a node to secondary.
8376
8377     """
8378     self.feedback_fn("* switching node %s to secondary mode" % node)
8379
8380     for dev in self.instance.disks:
8381       self.cfg.SetDiskID(dev, node)
8382
8383     result = self.rpc.call_blockdev_close(node, self.instance.name,
8384                                           self.instance.disks)
8385     result.Raise("Cannot change disk to secondary on node %s" % node)
8386
8387   def _GoStandalone(self):
8388     """Disconnect from the network.
8389
8390     """
8391     self.feedback_fn("* changing into standalone mode")
8392     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8393                                                self.instance.disks)
8394     for node, nres in result.items():
8395       nres.Raise("Cannot disconnect disks node %s" % node)
8396
8397   def _GoReconnect(self, multimaster):
8398     """Reconnect to the network.
8399
8400     """
8401     if multimaster:
8402       msg = "dual-master"
8403     else:
8404       msg = "single-master"
8405     self.feedback_fn("* changing disks into %s mode" % msg)
8406     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8407                                            (self.instance.disks, self.instance),
8408                                            self.instance.name, multimaster)
8409     for node, nres in result.items():
8410       nres.Raise("Cannot change disks config on node %s" % node)
8411
8412   def _ExecCleanup(self):
8413     """Try to cleanup after a failed migration.
8414
8415     The cleanup is done by:
8416       - check that the instance is running only on one node
8417         (and update the config if needed)
8418       - change disks on its secondary node to secondary
8419       - wait until disks are fully synchronized
8420       - disconnect from the network
8421       - change disks into single-master mode
8422       - wait again until disks are fully synchronized
8423
8424     """
8425     instance = self.instance
8426     target_node = self.target_node
8427     source_node = self.source_node
8428
8429     # check running on only one node
8430     self.feedback_fn("* checking where the instance actually runs"
8431                      " (if this hangs, the hypervisor might be in"
8432                      " a bad state)")
8433     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8434     for node, result in ins_l.items():
8435       result.Raise("Can't contact node %s" % node)
8436
8437     runningon_source = instance.name in ins_l[source_node].payload
8438     runningon_target = instance.name in ins_l[target_node].payload
8439
8440     if runningon_source and runningon_target:
8441       raise errors.OpExecError("Instance seems to be running on two nodes,"
8442                                " or the hypervisor is confused; you will have"
8443                                " to ensure manually that it runs only on one"
8444                                " and restart this operation")
8445
8446     if not (runningon_source or runningon_target):
8447       raise errors.OpExecError("Instance does not seem to be running at all;"
8448                                " in this case it's safer to repair by"
8449                                " running 'gnt-instance stop' to ensure disk"
8450                                " shutdown, and then restarting it")
8451
8452     if runningon_target:
8453       # the migration has actually succeeded, we need to update the config
8454       self.feedback_fn("* instance running on secondary node (%s),"
8455                        " updating config" % target_node)
8456       instance.primary_node = target_node
8457       self.cfg.Update(instance, self.feedback_fn)
8458       demoted_node = source_node
8459     else:
8460       self.feedback_fn("* instance confirmed to be running on its"
8461                        " primary node (%s)" % source_node)
8462       demoted_node = target_node
8463
8464     if instance.disk_template in constants.DTS_INT_MIRROR:
8465       self._EnsureSecondary(demoted_node)
8466       try:
8467         self._WaitUntilSync()
8468       except errors.OpExecError:
8469         # we ignore here errors, since if the device is standalone, it
8470         # won't be able to sync
8471         pass
8472       self._GoStandalone()
8473       self._GoReconnect(False)
8474       self._WaitUntilSync()
8475
8476     self.feedback_fn("* done")
8477
8478   def _RevertDiskStatus(self):
8479     """Try to revert the disk status after a failed migration.
8480
8481     """
8482     target_node = self.target_node
8483     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8484       return
8485
8486     try:
8487       self._EnsureSecondary(target_node)
8488       self._GoStandalone()
8489       self._GoReconnect(False)
8490       self._WaitUntilSync()
8491     except errors.OpExecError, err:
8492       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8493                          " please try to recover the instance manually;"
8494                          " error '%s'" % str(err))
8495
8496   def _AbortMigration(self):
8497     """Call the hypervisor code to abort a started migration.
8498
8499     """
8500     instance = self.instance
8501     target_node = self.target_node
8502     source_node = self.source_node
8503     migration_info = self.migration_info
8504
8505     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8506                                                                  instance,
8507                                                                  migration_info,
8508                                                                  False)
8509     abort_msg = abort_result.fail_msg
8510     if abort_msg:
8511       logging.error("Aborting migration failed on target node %s: %s",
8512                     target_node, abort_msg)
8513       # Don't raise an exception here, as we stil have to try to revert the
8514       # disk status, even if this step failed.
8515
8516     abort_result = self.rpc.call_instance_finalize_migration_src(
8517       source_node, instance, False, self.live)
8518     abort_msg = abort_result.fail_msg
8519     if abort_msg:
8520       logging.error("Aborting migration failed on source node %s: %s",
8521                     source_node, abort_msg)
8522
8523   def _ExecMigration(self):
8524     """Migrate an instance.
8525
8526     The migrate is done by:
8527       - change the disks into dual-master mode
8528       - wait until disks are fully synchronized again
8529       - migrate the instance
8530       - change disks on the new secondary node (the old primary) to secondary
8531       - wait until disks are fully synchronized
8532       - change disks into single-master mode
8533
8534     """
8535     instance = self.instance
8536     target_node = self.target_node
8537     source_node = self.source_node
8538
8539     # Check for hypervisor version mismatch and warn the user.
8540     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8541                                        None, [self.instance.hypervisor])
8542     for ninfo in nodeinfo.values():
8543       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8544                   ninfo.node)
8545     (_, _, (src_info, )) = nodeinfo[source_node].payload
8546     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8547
8548     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8549         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8550       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8551       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8552       if src_version != dst_version:
8553         self.feedback_fn("* warning: hypervisor version mismatch between"
8554                          " source (%s) and target (%s) node" %
8555                          (src_version, dst_version))
8556
8557     self.feedback_fn("* checking disk consistency between source and target")
8558     for (idx, dev) in enumerate(instance.disks):
8559       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8560         raise errors.OpExecError("Disk %s is degraded or not fully"
8561                                  " synchronized on target node,"
8562                                  " aborting migration" % idx)
8563
8564     if self.current_mem > self.tgt_free_mem:
8565       if not self.allow_runtime_changes:
8566         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8567                                  " free memory to fit instance %s on target"
8568                                  " node %s (have %dMB, need %dMB)" %
8569                                  (instance.name, target_node,
8570                                   self.tgt_free_mem, self.current_mem))
8571       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8572       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8573                                                      instance,
8574                                                      self.tgt_free_mem)
8575       rpcres.Raise("Cannot modify instance runtime memory")
8576
8577     # First get the migration information from the remote node
8578     result = self.rpc.call_migration_info(source_node, instance)
8579     msg = result.fail_msg
8580     if msg:
8581       log_err = ("Failed fetching source migration information from %s: %s" %
8582                  (source_node, msg))
8583       logging.error(log_err)
8584       raise errors.OpExecError(log_err)
8585
8586     self.migration_info = migration_info = result.payload
8587
8588     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8589       # Then switch the disks to master/master mode
8590       self._EnsureSecondary(target_node)
8591       self._GoStandalone()
8592       self._GoReconnect(True)
8593       self._WaitUntilSync()
8594
8595     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8596     result = self.rpc.call_accept_instance(target_node,
8597                                            instance,
8598                                            migration_info,
8599                                            self.nodes_ip[target_node])
8600
8601     msg = result.fail_msg
8602     if msg:
8603       logging.error("Instance pre-migration failed, trying to revert"
8604                     " disk status: %s", msg)
8605       self.feedback_fn("Pre-migration failed, aborting")
8606       self._AbortMigration()
8607       self._RevertDiskStatus()
8608       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8609                                (instance.name, msg))
8610
8611     self.feedback_fn("* migrating instance to %s" % target_node)
8612     result = self.rpc.call_instance_migrate(source_node, instance,
8613                                             self.nodes_ip[target_node],
8614                                             self.live)
8615     msg = result.fail_msg
8616     if msg:
8617       logging.error("Instance migration failed, trying to revert"
8618                     " disk status: %s", msg)
8619       self.feedback_fn("Migration failed, aborting")
8620       self._AbortMigration()
8621       self._RevertDiskStatus()
8622       raise errors.OpExecError("Could not migrate instance %s: %s" %
8623                                (instance.name, msg))
8624
8625     self.feedback_fn("* starting memory transfer")
8626     last_feedback = time.time()
8627     while True:
8628       result = self.rpc.call_instance_get_migration_status(source_node,
8629                                                            instance)
8630       msg = result.fail_msg
8631       ms = result.payload   # MigrationStatus instance
8632       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8633         logging.error("Instance migration failed, trying to revert"
8634                       " disk status: %s", msg)
8635         self.feedback_fn("Migration failed, aborting")
8636         self._AbortMigration()
8637         self._RevertDiskStatus()
8638         if not msg:
8639           msg = "hypervisor returned failure"
8640         raise errors.OpExecError("Could not migrate instance %s: %s" %
8641                                  (instance.name, msg))
8642
8643       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8644         self.feedback_fn("* memory transfer complete")
8645         break
8646
8647       if (utils.TimeoutExpired(last_feedback,
8648                                self._MIGRATION_FEEDBACK_INTERVAL) and
8649           ms.transferred_ram is not None):
8650         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8651         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8652         last_feedback = time.time()
8653
8654       time.sleep(self._MIGRATION_POLL_INTERVAL)
8655
8656     result = self.rpc.call_instance_finalize_migration_src(source_node,
8657                                                            instance,
8658                                                            True,
8659                                                            self.live)
8660     msg = result.fail_msg
8661     if msg:
8662       logging.error("Instance migration succeeded, but finalization failed"
8663                     " on the source node: %s", msg)
8664       raise errors.OpExecError("Could not finalize instance migration: %s" %
8665                                msg)
8666
8667     instance.primary_node = target_node
8668
8669     # distribute new instance config to the other nodes
8670     self.cfg.Update(instance, self.feedback_fn)
8671
8672     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8673                                                            instance,
8674                                                            migration_info,
8675                                                            True)
8676     msg = result.fail_msg
8677     if msg:
8678       logging.error("Instance migration succeeded, but finalization failed"
8679                     " on the target node: %s", msg)
8680       raise errors.OpExecError("Could not finalize instance migration: %s" %
8681                                msg)
8682
8683     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8684       self._EnsureSecondary(source_node)
8685       self._WaitUntilSync()
8686       self._GoStandalone()
8687       self._GoReconnect(False)
8688       self._WaitUntilSync()
8689
8690     # If the instance's disk template is `rbd' and there was a successful
8691     # migration, unmap the device from the source node.
8692     if self.instance.disk_template == constants.DT_RBD:
8693       disks = _ExpandCheckDisks(instance, instance.disks)
8694       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8695       for disk in disks:
8696         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8697         msg = result.fail_msg
8698         if msg:
8699           logging.error("Migration was successful, but couldn't unmap the"
8700                         " block device %s on source node %s: %s",
8701                         disk.iv_name, source_node, msg)
8702           logging.error("You need to unmap the device %s manually on %s",
8703                         disk.iv_name, source_node)
8704
8705     self.feedback_fn("* done")
8706
8707   def _ExecFailover(self):
8708     """Failover an instance.
8709
8710     The failover is done by shutting it down on its present node and
8711     starting it on the secondary.
8712
8713     """
8714     instance = self.instance
8715     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8716
8717     source_node = instance.primary_node
8718     target_node = self.target_node
8719
8720     if instance.admin_state == constants.ADMINST_UP:
8721       self.feedback_fn("* checking disk consistency between source and target")
8722       for (idx, dev) in enumerate(instance.disks):
8723         # for drbd, these are drbd over lvm
8724         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8725                                      False):
8726           if primary_node.offline:
8727             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8728                              " target node %s" %
8729                              (primary_node.name, idx, target_node))
8730           elif not self.ignore_consistency:
8731             raise errors.OpExecError("Disk %s is degraded on target node,"
8732                                      " aborting failover" % idx)
8733     else:
8734       self.feedback_fn("* not checking disk consistency as instance is not"
8735                        " running")
8736
8737     self.feedback_fn("* shutting down instance on source node")
8738     logging.info("Shutting down instance %s on node %s",
8739                  instance.name, source_node)
8740
8741     result = self.rpc.call_instance_shutdown(source_node, instance,
8742                                              self.shutdown_timeout)
8743     msg = result.fail_msg
8744     if msg:
8745       if self.ignore_consistency or primary_node.offline:
8746         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8747                            " proceeding anyway; please make sure node"
8748                            " %s is down; error details: %s",
8749                            instance.name, source_node, source_node, msg)
8750       else:
8751         raise errors.OpExecError("Could not shutdown instance %s on"
8752                                  " node %s: %s" %
8753                                  (instance.name, source_node, msg))
8754
8755     self.feedback_fn("* deactivating the instance's disks on source node")
8756     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8757       raise errors.OpExecError("Can't shut down the instance's disks")
8758
8759     instance.primary_node = target_node
8760     # distribute new instance config to the other nodes
8761     self.cfg.Update(instance, self.feedback_fn)
8762
8763     # Only start the instance if it's marked as up
8764     if instance.admin_state == constants.ADMINST_UP:
8765       self.feedback_fn("* activating the instance's disks on target node %s" %
8766                        target_node)
8767       logging.info("Starting instance %s on node %s",
8768                    instance.name, target_node)
8769
8770       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8771                                            ignore_secondaries=True)
8772       if not disks_ok:
8773         _ShutdownInstanceDisks(self.lu, instance)
8774         raise errors.OpExecError("Can't activate the instance's disks")
8775
8776       self.feedback_fn("* starting the instance on the target node %s" %
8777                        target_node)
8778       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8779                                             False)
8780       msg = result.fail_msg
8781       if msg:
8782         _ShutdownInstanceDisks(self.lu, instance)
8783         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8784                                  (instance.name, target_node, msg))
8785
8786   def Exec(self, feedback_fn):
8787     """Perform the migration.
8788
8789     """
8790     self.feedback_fn = feedback_fn
8791     self.source_node = self.instance.primary_node
8792
8793     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8794     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8795       self.target_node = self.instance.secondary_nodes[0]
8796       # Otherwise self.target_node has been populated either
8797       # directly, or through an iallocator.
8798
8799     self.all_nodes = [self.source_node, self.target_node]
8800     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8801                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8802
8803     if self.failover:
8804       feedback_fn("Failover instance %s" % self.instance.name)
8805       self._ExecFailover()
8806     else:
8807       feedback_fn("Migrating instance %s" % self.instance.name)
8808
8809       if self.cleanup:
8810         return self._ExecCleanup()
8811       else:
8812         return self._ExecMigration()
8813
8814
8815 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8816                     force_open):
8817   """Wrapper around L{_CreateBlockDevInner}.
8818
8819   This method annotates the root device first.
8820
8821   """
8822   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8823   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8824                               force_open)
8825
8826
8827 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8828                          info, force_open):
8829   """Create a tree of block devices on a given node.
8830
8831   If this device type has to be created on secondaries, create it and
8832   all its children.
8833
8834   If not, just recurse to children keeping the same 'force' value.
8835
8836   @attention: The device has to be annotated already.
8837
8838   @param lu: the lu on whose behalf we execute
8839   @param node: the node on which to create the device
8840   @type instance: L{objects.Instance}
8841   @param instance: the instance which owns the device
8842   @type device: L{objects.Disk}
8843   @param device: the device to create
8844   @type force_create: boolean
8845   @param force_create: whether to force creation of this device; this
8846       will be change to True whenever we find a device which has
8847       CreateOnSecondary() attribute
8848   @param info: the extra 'metadata' we should attach to the device
8849       (this will be represented as a LVM tag)
8850   @type force_open: boolean
8851   @param force_open: this parameter will be passes to the
8852       L{backend.BlockdevCreate} function where it specifies
8853       whether we run on primary or not, and it affects both
8854       the child assembly and the device own Open() execution
8855
8856   """
8857   if device.CreateOnSecondary():
8858     force_create = True
8859
8860   if device.children:
8861     for child in device.children:
8862       _CreateBlockDevInner(lu, node, instance, child, force_create,
8863                            info, force_open)
8864
8865   if not force_create:
8866     return
8867
8868   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8869
8870
8871 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8872   """Create a single block device on a given node.
8873
8874   This will not recurse over children of the device, so they must be
8875   created in advance.
8876
8877   @param lu: the lu on whose behalf we execute
8878   @param node: the node on which to create the device
8879   @type instance: L{objects.Instance}
8880   @param instance: the instance which owns the device
8881   @type device: L{objects.Disk}
8882   @param device: the device to create
8883   @param info: the extra 'metadata' we should attach to the device
8884       (this will be represented as a LVM tag)
8885   @type force_open: boolean
8886   @param force_open: this parameter will be passes to the
8887       L{backend.BlockdevCreate} function where it specifies
8888       whether we run on primary or not, and it affects both
8889       the child assembly and the device own Open() execution
8890
8891   """
8892   lu.cfg.SetDiskID(device, node)
8893   result = lu.rpc.call_blockdev_create(node, device, device.size,
8894                                        instance.name, force_open, info)
8895   result.Raise("Can't create block device %s on"
8896                " node %s for instance %s" % (device, node, instance.name))
8897   if device.physical_id is None:
8898     device.physical_id = result.payload
8899
8900
8901 def _GenerateUniqueNames(lu, exts):
8902   """Generate a suitable LV name.
8903
8904   This will generate a logical volume name for the given instance.
8905
8906   """
8907   results = []
8908   for val in exts:
8909     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8910     results.append("%s%s" % (new_id, val))
8911   return results
8912
8913
8914 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8915                          iv_name, p_minor, s_minor):
8916   """Generate a drbd8 device complete with its children.
8917
8918   """
8919   assert len(vgnames) == len(names) == 2
8920   port = lu.cfg.AllocatePort()
8921   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8922
8923   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8924                           logical_id=(vgnames[0], names[0]),
8925                           params={})
8926   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8927                           size=constants.DRBD_META_SIZE,
8928                           logical_id=(vgnames[1], names[1]),
8929                           params={})
8930   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8931                           logical_id=(primary, secondary, port,
8932                                       p_minor, s_minor,
8933                                       shared_secret),
8934                           children=[dev_data, dev_meta],
8935                           iv_name=iv_name, params={})
8936   return drbd_dev
8937
8938
8939 _DISK_TEMPLATE_NAME_PREFIX = {
8940   constants.DT_PLAIN: "",
8941   constants.DT_RBD: ".rbd",
8942   }
8943
8944
8945 _DISK_TEMPLATE_DEVICE_TYPE = {
8946   constants.DT_PLAIN: constants.LD_LV,
8947   constants.DT_FILE: constants.LD_FILE,
8948   constants.DT_SHARED_FILE: constants.LD_FILE,
8949   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8950   constants.DT_RBD: constants.LD_RBD,
8951   }
8952
8953
8954 def _GenerateDiskTemplate(
8955   lu, template_name, instance_name, primary_node, secondary_nodes,
8956   disk_info, file_storage_dir, file_driver, base_index,
8957   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8958   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8959   """Generate the entire disk layout for a given template type.
8960
8961   """
8962   #TODO: compute space requirements
8963
8964   vgname = lu.cfg.GetVGName()
8965   disk_count = len(disk_info)
8966   disks = []
8967
8968   if template_name == constants.DT_DISKLESS:
8969     pass
8970   elif template_name == constants.DT_DRBD8:
8971     if len(secondary_nodes) != 1:
8972       raise errors.ProgrammerError("Wrong template configuration")
8973     remote_node = secondary_nodes[0]
8974     minors = lu.cfg.AllocateDRBDMinor(
8975       [primary_node, remote_node] * len(disk_info), instance_name)
8976
8977     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8978                                                        full_disk_params)
8979     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8980
8981     names = []
8982     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8983                                                for i in range(disk_count)]):
8984       names.append(lv_prefix + "_data")
8985       names.append(lv_prefix + "_meta")
8986     for idx, disk in enumerate(disk_info):
8987       disk_index = idx + base_index
8988       data_vg = disk.get(constants.IDISK_VG, vgname)
8989       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8990       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8991                                       disk[constants.IDISK_SIZE],
8992                                       [data_vg, meta_vg],
8993                                       names[idx * 2:idx * 2 + 2],
8994                                       "disk/%d" % disk_index,
8995                                       minors[idx * 2], minors[idx * 2 + 1])
8996       disk_dev.mode = disk[constants.IDISK_MODE]
8997       disks.append(disk_dev)
8998   else:
8999     if secondary_nodes:
9000       raise errors.ProgrammerError("Wrong template configuration")
9001
9002     if template_name == constants.DT_FILE:
9003       _req_file_storage()
9004     elif template_name == constants.DT_SHARED_FILE:
9005       _req_shr_file_storage()
9006
9007     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9008     if name_prefix is None:
9009       names = None
9010     else:
9011       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9012                                         (name_prefix, base_index + i)
9013                                         for i in range(disk_count)])
9014
9015     if template_name == constants.DT_PLAIN:
9016       def logical_id_fn(idx, _, disk):
9017         vg = disk.get(constants.IDISK_VG, vgname)
9018         return (vg, names[idx])
9019     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9020       logical_id_fn = \
9021         lambda _, disk_index, disk: (file_driver,
9022                                      "%s/disk%d" % (file_storage_dir,
9023                                                     disk_index))
9024     elif template_name == constants.DT_BLOCK:
9025       logical_id_fn = \
9026         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9027                                        disk[constants.IDISK_ADOPT])
9028     elif template_name == constants.DT_RBD:
9029       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9030     else:
9031       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9032
9033     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9034
9035     for idx, disk in enumerate(disk_info):
9036       disk_index = idx + base_index
9037       size = disk[constants.IDISK_SIZE]
9038       feedback_fn("* disk %s, size %s" %
9039                   (disk_index, utils.FormatUnit(size, "h")))
9040       disks.append(objects.Disk(dev_type=dev_type, size=size,
9041                                 logical_id=logical_id_fn(idx, disk_index, disk),
9042                                 iv_name="disk/%d" % disk_index,
9043                                 mode=disk[constants.IDISK_MODE],
9044                                 params={}))
9045
9046   return disks
9047
9048
9049 def _GetInstanceInfoText(instance):
9050   """Compute that text that should be added to the disk's metadata.
9051
9052   """
9053   return "originstname+%s" % instance.name
9054
9055
9056 def _CalcEta(time_taken, written, total_size):
9057   """Calculates the ETA based on size written and total size.
9058
9059   @param time_taken: The time taken so far
9060   @param written: amount written so far
9061   @param total_size: The total size of data to be written
9062   @return: The remaining time in seconds
9063
9064   """
9065   avg_time = time_taken / float(written)
9066   return (total_size - written) * avg_time
9067
9068
9069 def _WipeDisks(lu, instance, disks=None):
9070   """Wipes instance disks.
9071
9072   @type lu: L{LogicalUnit}
9073   @param lu: the logical unit on whose behalf we execute
9074   @type instance: L{objects.Instance}
9075   @param instance: the instance whose disks we should create
9076   @return: the success of the wipe
9077
9078   """
9079   node = instance.primary_node
9080
9081   if disks is None:
9082     disks = [(idx, disk, 0)
9083              for (idx, disk) in enumerate(instance.disks)]
9084
9085   for (_, device, _) in disks:
9086     lu.cfg.SetDiskID(device, node)
9087
9088   logging.info("Pausing synchronization of disks of instance '%s'",
9089                instance.name)
9090   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9091                                                   (map(compat.snd, disks),
9092                                                    instance),
9093                                                   True)
9094   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9095
9096   for idx, success in enumerate(result.payload):
9097     if not success:
9098       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9099                    " failed", idx, instance.name)
9100
9101   try:
9102     for (idx, device, offset) in disks:
9103       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9104       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9105       wipe_chunk_size = \
9106         int(min(constants.MAX_WIPE_CHUNK,
9107                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9108
9109       size = device.size
9110       last_output = 0
9111       start_time = time.time()
9112
9113       if offset == 0:
9114         info_text = ""
9115       else:
9116         info_text = (" (from %s to %s)" %
9117                      (utils.FormatUnit(offset, "h"),
9118                       utils.FormatUnit(size, "h")))
9119
9120       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9121
9122       logging.info("Wiping disk %d for instance %s on node %s using"
9123                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9124
9125       while offset < size:
9126         wipe_size = min(wipe_chunk_size, size - offset)
9127
9128         logging.debug("Wiping disk %d, offset %s, chunk %s",
9129                       idx, offset, wipe_size)
9130
9131         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9132                                            wipe_size)
9133         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9134                      (idx, offset, wipe_size))
9135
9136         now = time.time()
9137         offset += wipe_size
9138         if now - last_output >= 60:
9139           eta = _CalcEta(now - start_time, offset, size)
9140           lu.LogInfo(" - done: %.1f%% ETA: %s",
9141                      offset / float(size) * 100, utils.FormatSeconds(eta))
9142           last_output = now
9143   finally:
9144     logging.info("Resuming synchronization of disks for instance '%s'",
9145                  instance.name)
9146
9147     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9148                                                     (map(compat.snd, disks),
9149                                                      instance),
9150                                                     False)
9151
9152     if result.fail_msg:
9153       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9154                     node, result.fail_msg)
9155     else:
9156       for idx, success in enumerate(result.payload):
9157         if not success:
9158           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9159                         " failed", idx, instance.name)
9160
9161
9162 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9163   """Create all disks for an instance.
9164
9165   This abstracts away some work from AddInstance.
9166
9167   @type lu: L{LogicalUnit}
9168   @param lu: the logical unit on whose behalf we execute
9169   @type instance: L{objects.Instance}
9170   @param instance: the instance whose disks we should create
9171   @type to_skip: list
9172   @param to_skip: list of indices to skip
9173   @type target_node: string
9174   @param target_node: if passed, overrides the target node for creation
9175   @rtype: boolean
9176   @return: the success of the creation
9177
9178   """
9179   info = _GetInstanceInfoText(instance)
9180   if target_node is None:
9181     pnode = instance.primary_node
9182     all_nodes = instance.all_nodes
9183   else:
9184     pnode = target_node
9185     all_nodes = [pnode]
9186
9187   if instance.disk_template in constants.DTS_FILEBASED:
9188     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9189     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9190
9191     result.Raise("Failed to create directory '%s' on"
9192                  " node %s" % (file_storage_dir, pnode))
9193
9194   # Note: this needs to be kept in sync with adding of disks in
9195   # LUInstanceSetParams
9196   for idx, device in enumerate(instance.disks):
9197     if to_skip and idx in to_skip:
9198       continue
9199     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9200     #HARDCODE
9201     for node in all_nodes:
9202       f_create = node == pnode
9203       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9204
9205
9206 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9207   """Remove all disks for an instance.
9208
9209   This abstracts away some work from `AddInstance()` and
9210   `RemoveInstance()`. Note that in case some of the devices couldn't
9211   be removed, the removal will continue with the other ones (compare
9212   with `_CreateDisks()`).
9213
9214   @type lu: L{LogicalUnit}
9215   @param lu: the logical unit on whose behalf we execute
9216   @type instance: L{objects.Instance}
9217   @param instance: the instance whose disks we should remove
9218   @type target_node: string
9219   @param target_node: used to override the node on which to remove the disks
9220   @rtype: boolean
9221   @return: the success of the removal
9222
9223   """
9224   logging.info("Removing block devices for instance %s", instance.name)
9225
9226   all_result = True
9227   ports_to_release = set()
9228   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9229   for (idx, device) in enumerate(anno_disks):
9230     if target_node:
9231       edata = [(target_node, device)]
9232     else:
9233       edata = device.ComputeNodeTree(instance.primary_node)
9234     for node, disk in edata:
9235       lu.cfg.SetDiskID(disk, node)
9236       result = lu.rpc.call_blockdev_remove(node, disk)
9237       if result.fail_msg:
9238         lu.LogWarning("Could not remove disk %s on node %s,"
9239                       " continuing anyway: %s", idx, node, result.fail_msg)
9240         if not (result.offline and node != instance.primary_node):
9241           all_result = False
9242
9243     # if this is a DRBD disk, return its port to the pool
9244     if device.dev_type in constants.LDS_DRBD:
9245       ports_to_release.add(device.logical_id[2])
9246
9247   if all_result or ignore_failures:
9248     for port in ports_to_release:
9249       lu.cfg.AddTcpUdpPort(port)
9250
9251   if instance.disk_template in constants.DTS_FILEBASED:
9252     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9253     if target_node:
9254       tgt = target_node
9255     else:
9256       tgt = instance.primary_node
9257     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9258     if result.fail_msg:
9259       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9260                     file_storage_dir, instance.primary_node, result.fail_msg)
9261       all_result = False
9262
9263   return all_result
9264
9265
9266 def _ComputeDiskSizePerVG(disk_template, disks):
9267   """Compute disk size requirements in the volume group
9268
9269   """
9270   def _compute(disks, payload):
9271     """Universal algorithm.
9272
9273     """
9274     vgs = {}
9275     for disk in disks:
9276       vgs[disk[constants.IDISK_VG]] = \
9277         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9278
9279     return vgs
9280
9281   # Required free disk space as a function of disk and swap space
9282   req_size_dict = {
9283     constants.DT_DISKLESS: {},
9284     constants.DT_PLAIN: _compute(disks, 0),
9285     # 128 MB are added for drbd metadata for each disk
9286     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9287     constants.DT_FILE: {},
9288     constants.DT_SHARED_FILE: {},
9289   }
9290
9291   if disk_template not in req_size_dict:
9292     raise errors.ProgrammerError("Disk template '%s' size requirement"
9293                                  " is unknown" % disk_template)
9294
9295   return req_size_dict[disk_template]
9296
9297
9298 def _FilterVmNodes(lu, nodenames):
9299   """Filters out non-vm_capable nodes from a list.
9300
9301   @type lu: L{LogicalUnit}
9302   @param lu: the logical unit for which we check
9303   @type nodenames: list
9304   @param nodenames: the list of nodes on which we should check
9305   @rtype: list
9306   @return: the list of vm-capable nodes
9307
9308   """
9309   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9310   return [name for name in nodenames if name not in vm_nodes]
9311
9312
9313 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9314   """Hypervisor parameter validation.
9315
9316   This function abstract the hypervisor parameter validation to be
9317   used in both instance create and instance modify.
9318
9319   @type lu: L{LogicalUnit}
9320   @param lu: the logical unit for which we check
9321   @type nodenames: list
9322   @param nodenames: the list of nodes on which we should check
9323   @type hvname: string
9324   @param hvname: the name of the hypervisor we should use
9325   @type hvparams: dict
9326   @param hvparams: the parameters which we need to check
9327   @raise errors.OpPrereqError: if the parameters are not valid
9328
9329   """
9330   nodenames = _FilterVmNodes(lu, nodenames)
9331
9332   cluster = lu.cfg.GetClusterInfo()
9333   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9334
9335   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9336   for node in nodenames:
9337     info = hvinfo[node]
9338     if info.offline:
9339       continue
9340     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9341
9342
9343 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9344   """OS parameters validation.
9345
9346   @type lu: L{LogicalUnit}
9347   @param lu: the logical unit for which we check
9348   @type required: boolean
9349   @param required: whether the validation should fail if the OS is not
9350       found
9351   @type nodenames: list
9352   @param nodenames: the list of nodes on which we should check
9353   @type osname: string
9354   @param osname: the name of the hypervisor we should use
9355   @type osparams: dict
9356   @param osparams: the parameters which we need to check
9357   @raise errors.OpPrereqError: if the parameters are not valid
9358
9359   """
9360   nodenames = _FilterVmNodes(lu, nodenames)
9361   result = lu.rpc.call_os_validate(nodenames, required, osname,
9362                                    [constants.OS_VALIDATE_PARAMETERS],
9363                                    osparams)
9364   for node, nres in result.items():
9365     # we don't check for offline cases since this should be run only
9366     # against the master node and/or an instance's nodes
9367     nres.Raise("OS Parameters validation failed on node %s" % node)
9368     if not nres.payload:
9369       lu.LogInfo("OS %s not found on node %s, validation skipped",
9370                  osname, node)
9371
9372
9373 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9374   """Wrapper around IAReqInstanceAlloc.
9375
9376   @param op: The instance opcode
9377   @param disks: The computed disks
9378   @param nics: The computed nics
9379   @param beparams: The full filled beparams
9380
9381   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9382
9383   """
9384   spindle_use = beparams[constants.BE_SPINDLE_USE]
9385   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9386                                        disk_template=op.disk_template,
9387                                        tags=op.tags,
9388                                        os=op.os_type,
9389                                        vcpus=beparams[constants.BE_VCPUS],
9390                                        memory=beparams[constants.BE_MAXMEM],
9391                                        spindle_use=spindle_use,
9392                                        disks=disks,
9393                                        nics=[n.ToDict() for n in nics],
9394                                        hypervisor=op.hypervisor)
9395
9396
9397 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9398   """Computes the nics.
9399
9400   @param op: The instance opcode
9401   @param cluster: Cluster configuration object
9402   @param default_ip: The default ip to assign
9403   @param cfg: An instance of the configuration object
9404   @param proc: The executer instance
9405
9406   @returns: The build up nics
9407
9408   """
9409   nics = []
9410   for idx, nic in enumerate(op.nics):
9411     nic_mode_req = nic.get(constants.INIC_MODE, None)
9412     nic_mode = nic_mode_req
9413     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9414       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9415
9416     net = nic.get(constants.INIC_NETWORK, None)
9417     link = nic.get(constants.NIC_LINK, None)
9418     ip = nic.get(constants.INIC_IP, None)
9419
9420     if net is None or net.lower() == constants.VALUE_NONE:
9421       net = None
9422     else:
9423       if nic_mode_req is not None or link is not None:
9424         raise errors.OpPrereqError("If network is given, no mode or link"
9425                                    " is allowed to be passed",
9426                                    errors.ECODE_INVAL)
9427
9428     # ip validity checks
9429     if ip is None or ip.lower() == constants.VALUE_NONE:
9430       nic_ip = None
9431     elif ip.lower() == constants.VALUE_AUTO:
9432       if not op.name_check:
9433         raise errors.OpPrereqError("IP address set to auto but name checks"
9434                                    " have been skipped",
9435                                    errors.ECODE_INVAL)
9436       nic_ip = default_ip
9437     else:
9438       # We defer pool operations until later, so that the iallocator has
9439       # filled in the instance's node(s) dimara
9440       if ip.lower() == constants.NIC_IP_POOL:
9441         if net is None:
9442           raise errors.OpPrereqError("if ip=pool, parameter network"
9443                                      " must be passed too",
9444                                      errors.ECODE_INVAL)
9445
9446       elif not netutils.IPAddress.IsValid(ip):
9447         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9448                                    errors.ECODE_INVAL)
9449
9450       nic_ip = ip
9451
9452     # TODO: check the ip address for uniqueness
9453     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9454       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9455                                  errors.ECODE_INVAL)
9456
9457     # MAC address verification
9458     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9459     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9460       mac = utils.NormalizeAndValidateMac(mac)
9461
9462       try:
9463         # TODO: We need to factor this out
9464         cfg.ReserveMAC(mac, proc.GetECId())
9465       except errors.ReservationError:
9466         raise errors.OpPrereqError("MAC address %s already in use"
9467                                    " in cluster" % mac,
9468                                    errors.ECODE_NOTUNIQUE)
9469
9470     #  Build nic parameters
9471     nicparams = {}
9472     if nic_mode_req:
9473       nicparams[constants.NIC_MODE] = nic_mode
9474     if link:
9475       nicparams[constants.NIC_LINK] = link
9476
9477     check_params = cluster.SimpleFillNIC(nicparams)
9478     objects.NIC.CheckParameterSyntax(check_params)
9479     nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9480
9481   return nics
9482
9483
9484 def _ComputeDisks(op, default_vg):
9485   """Computes the instance disks.
9486
9487   @param op: The instance opcode
9488   @param default_vg: The default_vg to assume
9489
9490   @return: The computer disks
9491
9492   """
9493   disks = []
9494   for disk in op.disks:
9495     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9496     if mode not in constants.DISK_ACCESS_SET:
9497       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9498                                  mode, errors.ECODE_INVAL)
9499     size = disk.get(constants.IDISK_SIZE, None)
9500     if size is None:
9501       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9502     try:
9503       size = int(size)
9504     except (TypeError, ValueError):
9505       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9506                                  errors.ECODE_INVAL)
9507
9508     data_vg = disk.get(constants.IDISK_VG, default_vg)
9509     new_disk = {
9510       constants.IDISK_SIZE: size,
9511       constants.IDISK_MODE: mode,
9512       constants.IDISK_VG: data_vg,
9513       }
9514     if constants.IDISK_METAVG in disk:
9515       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9516     if constants.IDISK_ADOPT in disk:
9517       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9518     disks.append(new_disk)
9519
9520   return disks
9521
9522
9523 def _ComputeFullBeParams(op, cluster):
9524   """Computes the full beparams.
9525
9526   @param op: The instance opcode
9527   @param cluster: The cluster config object
9528
9529   @return: The fully filled beparams
9530
9531   """
9532   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9533   for param, value in op.beparams.iteritems():
9534     if value == constants.VALUE_AUTO:
9535       op.beparams[param] = default_beparams[param]
9536   objects.UpgradeBeParams(op.beparams)
9537   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9538   return cluster.SimpleFillBE(op.beparams)
9539
9540
9541 class LUInstanceCreate(LogicalUnit):
9542   """Create an instance.
9543
9544   """
9545   HPATH = "instance-add"
9546   HTYPE = constants.HTYPE_INSTANCE
9547   REQ_BGL = False
9548
9549   def CheckArguments(self):
9550     """Check arguments.
9551
9552     """
9553     # do not require name_check to ease forward/backward compatibility
9554     # for tools
9555     if self.op.no_install and self.op.start:
9556       self.LogInfo("No-installation mode selected, disabling startup")
9557       self.op.start = False
9558     # validate/normalize the instance name
9559     self.op.instance_name = \
9560       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9561
9562     if self.op.ip_check and not self.op.name_check:
9563       # TODO: make the ip check more flexible and not depend on the name check
9564       raise errors.OpPrereqError("Cannot do IP address check without a name"
9565                                  " check", errors.ECODE_INVAL)
9566
9567     # check nics' parameter names
9568     for nic in self.op.nics:
9569       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9570
9571     # check disks. parameter names and consistent adopt/no-adopt strategy
9572     has_adopt = has_no_adopt = False
9573     for disk in self.op.disks:
9574       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9575       if constants.IDISK_ADOPT in disk:
9576         has_adopt = True
9577       else:
9578         has_no_adopt = True
9579     if has_adopt and has_no_adopt:
9580       raise errors.OpPrereqError("Either all disks are adopted or none is",
9581                                  errors.ECODE_INVAL)
9582     if has_adopt:
9583       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9584         raise errors.OpPrereqError("Disk adoption is not supported for the"
9585                                    " '%s' disk template" %
9586                                    self.op.disk_template,
9587                                    errors.ECODE_INVAL)
9588       if self.op.iallocator is not None:
9589         raise errors.OpPrereqError("Disk adoption not allowed with an"
9590                                    " iallocator script", errors.ECODE_INVAL)
9591       if self.op.mode == constants.INSTANCE_IMPORT:
9592         raise errors.OpPrereqError("Disk adoption not allowed for"
9593                                    " instance import", errors.ECODE_INVAL)
9594     else:
9595       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9596         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9597                                    " but no 'adopt' parameter given" %
9598                                    self.op.disk_template,
9599                                    errors.ECODE_INVAL)
9600
9601     self.adopt_disks = has_adopt
9602
9603     # instance name verification
9604     if self.op.name_check:
9605       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9606       self.op.instance_name = self.hostname1.name
9607       # used in CheckPrereq for ip ping check
9608       self.check_ip = self.hostname1.ip
9609     else:
9610       self.check_ip = None
9611
9612     # file storage checks
9613     if (self.op.file_driver and
9614         not self.op.file_driver in constants.FILE_DRIVER):
9615       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9616                                  self.op.file_driver, errors.ECODE_INVAL)
9617
9618     if self.op.disk_template == constants.DT_FILE:
9619       opcodes.RequireFileStorage()
9620     elif self.op.disk_template == constants.DT_SHARED_FILE:
9621       opcodes.RequireSharedFileStorage()
9622
9623     ### Node/iallocator related checks
9624     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9625
9626     if self.op.pnode is not None:
9627       if self.op.disk_template in constants.DTS_INT_MIRROR:
9628         if self.op.snode is None:
9629           raise errors.OpPrereqError("The networked disk templates need"
9630                                      " a mirror node", errors.ECODE_INVAL)
9631       elif self.op.snode:
9632         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9633                         " template")
9634         self.op.snode = None
9635
9636     self._cds = _GetClusterDomainSecret()
9637
9638     if self.op.mode == constants.INSTANCE_IMPORT:
9639       # On import force_variant must be True, because if we forced it at
9640       # initial install, our only chance when importing it back is that it
9641       # works again!
9642       self.op.force_variant = True
9643
9644       if self.op.no_install:
9645         self.LogInfo("No-installation mode has no effect during import")
9646
9647     elif self.op.mode == constants.INSTANCE_CREATE:
9648       if self.op.os_type is None:
9649         raise errors.OpPrereqError("No guest OS specified",
9650                                    errors.ECODE_INVAL)
9651       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9652         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9653                                    " installation" % self.op.os_type,
9654                                    errors.ECODE_STATE)
9655       if self.op.disk_template is None:
9656         raise errors.OpPrereqError("No disk template specified",
9657                                    errors.ECODE_INVAL)
9658
9659     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9660       # Check handshake to ensure both clusters have the same domain secret
9661       src_handshake = self.op.source_handshake
9662       if not src_handshake:
9663         raise errors.OpPrereqError("Missing source handshake",
9664                                    errors.ECODE_INVAL)
9665
9666       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9667                                                            src_handshake)
9668       if errmsg:
9669         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9670                                    errors.ECODE_INVAL)
9671
9672       # Load and check source CA
9673       self.source_x509_ca_pem = self.op.source_x509_ca
9674       if not self.source_x509_ca_pem:
9675         raise errors.OpPrereqError("Missing source X509 CA",
9676                                    errors.ECODE_INVAL)
9677
9678       try:
9679         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9680                                                     self._cds)
9681       except OpenSSL.crypto.Error, err:
9682         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9683                                    (err, ), errors.ECODE_INVAL)
9684
9685       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9686       if errcode is not None:
9687         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9688                                    errors.ECODE_INVAL)
9689
9690       self.source_x509_ca = cert
9691
9692       src_instance_name = self.op.source_instance_name
9693       if not src_instance_name:
9694         raise errors.OpPrereqError("Missing source instance name",
9695                                    errors.ECODE_INVAL)
9696
9697       self.source_instance_name = \
9698           netutils.GetHostname(name=src_instance_name).name
9699
9700     else:
9701       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9702                                  self.op.mode, errors.ECODE_INVAL)
9703
9704   def ExpandNames(self):
9705     """ExpandNames for CreateInstance.
9706
9707     Figure out the right locks for instance creation.
9708
9709     """
9710     self.needed_locks = {}
9711
9712     instance_name = self.op.instance_name
9713     # this is just a preventive check, but someone might still add this
9714     # instance in the meantime, and creation will fail at lock-add time
9715     if instance_name in self.cfg.GetInstanceList():
9716       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9717                                  instance_name, errors.ECODE_EXISTS)
9718
9719     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9720
9721     if self.op.iallocator:
9722       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9723       # specifying a group on instance creation and then selecting nodes from
9724       # that group
9725       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9726       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9727     else:
9728       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9729       nodelist = [self.op.pnode]
9730       if self.op.snode is not None:
9731         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9732         nodelist.append(self.op.snode)
9733       self.needed_locks[locking.LEVEL_NODE] = nodelist
9734       # Lock resources of instance's primary and secondary nodes (copy to
9735       # prevent accidential modification)
9736       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9737
9738     # in case of import lock the source node too
9739     if self.op.mode == constants.INSTANCE_IMPORT:
9740       src_node = self.op.src_node
9741       src_path = self.op.src_path
9742
9743       if src_path is None:
9744         self.op.src_path = src_path = self.op.instance_name
9745
9746       if src_node is None:
9747         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9748         self.op.src_node = None
9749         if os.path.isabs(src_path):
9750           raise errors.OpPrereqError("Importing an instance from a path"
9751                                      " requires a source node option",
9752                                      errors.ECODE_INVAL)
9753       else:
9754         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9755         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9756           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9757         if not os.path.isabs(src_path):
9758           self.op.src_path = src_path = \
9759             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9760
9761   def _RunAllocator(self):
9762     """Run the allocator based on input opcode.
9763
9764     """
9765     #TODO Export network to iallocator so that it chooses a pnode
9766     #     in a nodegroup that has the desired network connected to
9767     req = _CreateInstanceAllocRequest(self.op, self.disks,
9768                                       self.nics, self.be_full)
9769     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9770
9771     ial.Run(self.op.iallocator)
9772
9773     if not ial.success:
9774       raise errors.OpPrereqError("Can't compute nodes using"
9775                                  " iallocator '%s': %s" %
9776                                  (self.op.iallocator, ial.info),
9777                                  errors.ECODE_NORES)
9778     self.op.pnode = ial.result[0]
9779     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9780                  self.op.instance_name, self.op.iallocator,
9781                  utils.CommaJoin(ial.result))
9782
9783     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9784
9785     if req.RequiredNodes() == 2:
9786       self.op.snode = ial.result[1]
9787
9788   def BuildHooksEnv(self):
9789     """Build hooks env.
9790
9791     This runs on master, primary and secondary nodes of the instance.
9792
9793     """
9794     env = {
9795       "ADD_MODE": self.op.mode,
9796       }
9797     if self.op.mode == constants.INSTANCE_IMPORT:
9798       env["SRC_NODE"] = self.op.src_node
9799       env["SRC_PATH"] = self.op.src_path
9800       env["SRC_IMAGES"] = self.src_images
9801
9802     env.update(_BuildInstanceHookEnv(
9803       name=self.op.instance_name,
9804       primary_node=self.op.pnode,
9805       secondary_nodes=self.secondaries,
9806       status=self.op.start,
9807       os_type=self.op.os_type,
9808       minmem=self.be_full[constants.BE_MINMEM],
9809       maxmem=self.be_full[constants.BE_MAXMEM],
9810       vcpus=self.be_full[constants.BE_VCPUS],
9811       nics=_NICListToTuple(self, self.nics),
9812       disk_template=self.op.disk_template,
9813       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9814              for d in self.disks],
9815       bep=self.be_full,
9816       hvp=self.hv_full,
9817       hypervisor_name=self.op.hypervisor,
9818       tags=self.op.tags,
9819     ))
9820
9821     return env
9822
9823   def BuildHooksNodes(self):
9824     """Build hooks nodes.
9825
9826     """
9827     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9828     return nl, nl
9829
9830   def _ReadExportInfo(self):
9831     """Reads the export information from disk.
9832
9833     It will override the opcode source node and path with the actual
9834     information, if these two were not specified before.
9835
9836     @return: the export information
9837
9838     """
9839     assert self.op.mode == constants.INSTANCE_IMPORT
9840
9841     src_node = self.op.src_node
9842     src_path = self.op.src_path
9843
9844     if src_node is None:
9845       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9846       exp_list = self.rpc.call_export_list(locked_nodes)
9847       found = False
9848       for node in exp_list:
9849         if exp_list[node].fail_msg:
9850           continue
9851         if src_path in exp_list[node].payload:
9852           found = True
9853           self.op.src_node = src_node = node
9854           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9855                                                        src_path)
9856           break
9857       if not found:
9858         raise errors.OpPrereqError("No export found for relative path %s" %
9859                                     src_path, errors.ECODE_INVAL)
9860
9861     _CheckNodeOnline(self, src_node)
9862     result = self.rpc.call_export_info(src_node, src_path)
9863     result.Raise("No export or invalid export found in dir %s" % src_path)
9864
9865     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9866     if not export_info.has_section(constants.INISECT_EXP):
9867       raise errors.ProgrammerError("Corrupted export config",
9868                                    errors.ECODE_ENVIRON)
9869
9870     ei_version = export_info.get(constants.INISECT_EXP, "version")
9871     if (int(ei_version) != constants.EXPORT_VERSION):
9872       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9873                                  (ei_version, constants.EXPORT_VERSION),
9874                                  errors.ECODE_ENVIRON)
9875     return export_info
9876
9877   def _ReadExportParams(self, einfo):
9878     """Use export parameters as defaults.
9879
9880     In case the opcode doesn't specify (as in override) some instance
9881     parameters, then try to use them from the export information, if
9882     that declares them.
9883
9884     """
9885     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9886
9887     if self.op.disk_template is None:
9888       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9889         self.op.disk_template = einfo.get(constants.INISECT_INS,
9890                                           "disk_template")
9891         if self.op.disk_template not in constants.DISK_TEMPLATES:
9892           raise errors.OpPrereqError("Disk template specified in configuration"
9893                                      " file is not one of the allowed values:"
9894                                      " %s" %
9895                                      " ".join(constants.DISK_TEMPLATES),
9896                                      errors.ECODE_INVAL)
9897       else:
9898         raise errors.OpPrereqError("No disk template specified and the export"
9899                                    " is missing the disk_template information",
9900                                    errors.ECODE_INVAL)
9901
9902     if not self.op.disks:
9903       disks = []
9904       # TODO: import the disk iv_name too
9905       for idx in range(constants.MAX_DISKS):
9906         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9907           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9908           disks.append({constants.IDISK_SIZE: disk_sz})
9909       self.op.disks = disks
9910       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9911         raise errors.OpPrereqError("No disk info specified and the export"
9912                                    " is missing the disk information",
9913                                    errors.ECODE_INVAL)
9914
9915     if not self.op.nics:
9916       nics = []
9917       for idx in range(constants.MAX_NICS):
9918         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9919           ndict = {}
9920           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9921             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9922             ndict[name] = v
9923           nics.append(ndict)
9924         else:
9925           break
9926       self.op.nics = nics
9927
9928     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9929       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9930
9931     if (self.op.hypervisor is None and
9932         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9933       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9934
9935     if einfo.has_section(constants.INISECT_HYP):
9936       # use the export parameters but do not override the ones
9937       # specified by the user
9938       for name, value in einfo.items(constants.INISECT_HYP):
9939         if name not in self.op.hvparams:
9940           self.op.hvparams[name] = value
9941
9942     if einfo.has_section(constants.INISECT_BEP):
9943       # use the parameters, without overriding
9944       for name, value in einfo.items(constants.INISECT_BEP):
9945         if name not in self.op.beparams:
9946           self.op.beparams[name] = value
9947         # Compatibility for the old "memory" be param
9948         if name == constants.BE_MEMORY:
9949           if constants.BE_MAXMEM not in self.op.beparams:
9950             self.op.beparams[constants.BE_MAXMEM] = value
9951           if constants.BE_MINMEM not in self.op.beparams:
9952             self.op.beparams[constants.BE_MINMEM] = value
9953     else:
9954       # try to read the parameters old style, from the main section
9955       for name in constants.BES_PARAMETERS:
9956         if (name not in self.op.beparams and
9957             einfo.has_option(constants.INISECT_INS, name)):
9958           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9959
9960     if einfo.has_section(constants.INISECT_OSP):
9961       # use the parameters, without overriding
9962       for name, value in einfo.items(constants.INISECT_OSP):
9963         if name not in self.op.osparams:
9964           self.op.osparams[name] = value
9965
9966   def _RevertToDefaults(self, cluster):
9967     """Revert the instance parameters to the default values.
9968
9969     """
9970     # hvparams
9971     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9972     for name in self.op.hvparams.keys():
9973       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9974         del self.op.hvparams[name]
9975     # beparams
9976     be_defs = cluster.SimpleFillBE({})
9977     for name in self.op.beparams.keys():
9978       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9979         del self.op.beparams[name]
9980     # nic params
9981     nic_defs = cluster.SimpleFillNIC({})
9982     for nic in self.op.nics:
9983       for name in constants.NICS_PARAMETERS:
9984         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9985           del nic[name]
9986     # osparams
9987     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9988     for name in self.op.osparams.keys():
9989       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9990         del self.op.osparams[name]
9991
9992   def _CalculateFileStorageDir(self):
9993     """Calculate final instance file storage dir.
9994
9995     """
9996     # file storage dir calculation/check
9997     self.instance_file_storage_dir = None
9998     if self.op.disk_template in constants.DTS_FILEBASED:
9999       # build the full file storage dir path
10000       joinargs = []
10001
10002       if self.op.disk_template == constants.DT_SHARED_FILE:
10003         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10004       else:
10005         get_fsd_fn = self.cfg.GetFileStorageDir
10006
10007       cfg_storagedir = get_fsd_fn()
10008       if not cfg_storagedir:
10009         raise errors.OpPrereqError("Cluster file storage dir not defined",
10010                                    errors.ECODE_STATE)
10011       joinargs.append(cfg_storagedir)
10012
10013       if self.op.file_storage_dir is not None:
10014         joinargs.append(self.op.file_storage_dir)
10015
10016       joinargs.append(self.op.instance_name)
10017
10018       # pylint: disable=W0142
10019       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10020
10021   def CheckPrereq(self): # pylint: disable=R0914
10022     """Check prerequisites.
10023
10024     """
10025     self._CalculateFileStorageDir()
10026
10027     if self.op.mode == constants.INSTANCE_IMPORT:
10028       export_info = self._ReadExportInfo()
10029       self._ReadExportParams(export_info)
10030       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10031     else:
10032       self._old_instance_name = None
10033
10034     if (not self.cfg.GetVGName() and
10035         self.op.disk_template not in constants.DTS_NOT_LVM):
10036       raise errors.OpPrereqError("Cluster does not support lvm-based"
10037                                  " instances", errors.ECODE_STATE)
10038
10039     if (self.op.hypervisor is None or
10040         self.op.hypervisor == constants.VALUE_AUTO):
10041       self.op.hypervisor = self.cfg.GetHypervisorType()
10042
10043     cluster = self.cfg.GetClusterInfo()
10044     enabled_hvs = cluster.enabled_hypervisors
10045     if self.op.hypervisor not in enabled_hvs:
10046       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10047                                  " cluster (%s)" %
10048                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10049                                  errors.ECODE_STATE)
10050
10051     # Check tag validity
10052     for tag in self.op.tags:
10053       objects.TaggableObject.ValidateTag(tag)
10054
10055     # check hypervisor parameter syntax (locally)
10056     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10057     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10058                                       self.op.hvparams)
10059     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10060     hv_type.CheckParameterSyntax(filled_hvp)
10061     self.hv_full = filled_hvp
10062     # check that we don't specify global parameters on an instance
10063     _CheckGlobalHvParams(self.op.hvparams)
10064
10065     # fill and remember the beparams dict
10066     self.be_full = _ComputeFullBeParams(self.op, cluster)
10067
10068     # build os parameters
10069     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10070
10071     # now that hvp/bep are in final format, let's reset to defaults,
10072     # if told to do so
10073     if self.op.identify_defaults:
10074       self._RevertToDefaults(cluster)
10075
10076     # NIC buildup
10077     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10078                              self.proc)
10079
10080     # disk checks/pre-build
10081     default_vg = self.cfg.GetVGName()
10082     self.disks = _ComputeDisks(self.op, default_vg)
10083
10084     if self.op.mode == constants.INSTANCE_IMPORT:
10085       disk_images = []
10086       for idx in range(len(self.disks)):
10087         option = "disk%d_dump" % idx
10088         if export_info.has_option(constants.INISECT_INS, option):
10089           # FIXME: are the old os-es, disk sizes, etc. useful?
10090           export_name = export_info.get(constants.INISECT_INS, option)
10091           image = utils.PathJoin(self.op.src_path, export_name)
10092           disk_images.append(image)
10093         else:
10094           disk_images.append(False)
10095
10096       self.src_images = disk_images
10097
10098       if self.op.instance_name == self._old_instance_name:
10099         for idx, nic in enumerate(self.nics):
10100           if nic.mac == constants.VALUE_AUTO:
10101             nic_mac_ini = "nic%d_mac" % idx
10102             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10103
10104     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10105
10106     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10107     if self.op.ip_check:
10108       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10109         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10110                                    (self.check_ip, self.op.instance_name),
10111                                    errors.ECODE_NOTUNIQUE)
10112
10113     #### mac address generation
10114     # By generating here the mac address both the allocator and the hooks get
10115     # the real final mac address rather than the 'auto' or 'generate' value.
10116     # There is a race condition between the generation and the instance object
10117     # creation, which means that we know the mac is valid now, but we're not
10118     # sure it will be when we actually add the instance. If things go bad
10119     # adding the instance will abort because of a duplicate mac, and the
10120     # creation job will fail.
10121     for nic in self.nics:
10122       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10123         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10124
10125     #### allocator run
10126
10127     if self.op.iallocator is not None:
10128       self._RunAllocator()
10129
10130     # Release all unneeded node locks
10131     _ReleaseLocks(self, locking.LEVEL_NODE,
10132                   keep=filter(None, [self.op.pnode, self.op.snode,
10133                                      self.op.src_node]))
10134     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10135                   keep=filter(None, [self.op.pnode, self.op.snode,
10136                                      self.op.src_node]))
10137
10138     #### node related checks
10139
10140     # check primary node
10141     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10142     assert self.pnode is not None, \
10143       "Cannot retrieve locked node %s" % self.op.pnode
10144     if pnode.offline:
10145       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10146                                  pnode.name, errors.ECODE_STATE)
10147     if pnode.drained:
10148       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10149                                  pnode.name, errors.ECODE_STATE)
10150     if not pnode.vm_capable:
10151       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10152                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10153
10154     self.secondaries = []
10155
10156     # Fill in any IPs from IP pools. This must happen here, because we need to
10157     # know the nic's primary node, as specified by the iallocator
10158     for idx, nic in enumerate(self.nics):
10159       net = nic.network
10160       if net is not None:
10161         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10162         if netparams is None:
10163           raise errors.OpPrereqError("No netparams found for network"
10164                                      " %s. Propably not connected to"
10165                                      " node's %s nodegroup" %
10166                                      (net, self.pnode.name),
10167                                      errors.ECODE_INVAL)
10168         self.LogInfo("NIC/%d inherits netparams %s" %
10169                      (idx, netparams.values()))
10170         nic.nicparams = dict(netparams)
10171         if nic.ip is not None:
10172           filled_params = cluster.SimpleFillNIC(nic.nicparams)
10173           if nic.ip.lower() == constants.NIC_IP_POOL:
10174             try:
10175               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10176             except errors.ReservationError:
10177               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10178                                          " from the address pool" % idx,
10179                                          errors.ECODE_STATE)
10180             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10181           else:
10182             try:
10183               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10184             except errors.ReservationError:
10185               raise errors.OpPrereqError("IP address %s already in use"
10186                                          " or does not belong to network %s" %
10187                                          (nic.ip, net),
10188                                          errors.ECODE_NOTUNIQUE)
10189       else:
10190         # net is None, ip None or given
10191         if self.op.conflicts_check:
10192           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10193
10194
10195     # mirror node verification
10196     if self.op.disk_template in constants.DTS_INT_MIRROR:
10197       if self.op.snode == pnode.name:
10198         raise errors.OpPrereqError("The secondary node cannot be the"
10199                                    " primary node", errors.ECODE_INVAL)
10200       _CheckNodeOnline(self, self.op.snode)
10201       _CheckNodeNotDrained(self, self.op.snode)
10202       _CheckNodeVmCapable(self, self.op.snode)
10203       self.secondaries.append(self.op.snode)
10204
10205       snode = self.cfg.GetNodeInfo(self.op.snode)
10206       if pnode.group != snode.group:
10207         self.LogWarning("The primary and secondary nodes are in two"
10208                         " different node groups; the disk parameters"
10209                         " from the first disk's node group will be"
10210                         " used")
10211
10212     nodenames = [pnode.name] + self.secondaries
10213
10214     # Verify instance specs
10215     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10216     ispec = {
10217       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10218       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10219       constants.ISPEC_DISK_COUNT: len(self.disks),
10220       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10221       constants.ISPEC_NIC_COUNT: len(self.nics),
10222       constants.ISPEC_SPINDLE_USE: spindle_use,
10223       }
10224
10225     group_info = self.cfg.GetNodeGroup(pnode.group)
10226     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10227     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10228     if not self.op.ignore_ipolicy and res:
10229       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10230              (pnode.group, group_info.name, utils.CommaJoin(res)))
10231       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10232
10233     if not self.adopt_disks:
10234       if self.op.disk_template == constants.DT_RBD:
10235         # _CheckRADOSFreeSpace() is just a placeholder.
10236         # Any function that checks prerequisites can be placed here.
10237         # Check if there is enough space on the RADOS cluster.
10238         _CheckRADOSFreeSpace()
10239       else:
10240         # Check lv size requirements, if not adopting
10241         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10242         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10243
10244     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10245       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10246                                 disk[constants.IDISK_ADOPT])
10247                      for disk in self.disks])
10248       if len(all_lvs) != len(self.disks):
10249         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10250                                    errors.ECODE_INVAL)
10251       for lv_name in all_lvs:
10252         try:
10253           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10254           # to ReserveLV uses the same syntax
10255           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10256         except errors.ReservationError:
10257           raise errors.OpPrereqError("LV named %s used by another instance" %
10258                                      lv_name, errors.ECODE_NOTUNIQUE)
10259
10260       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10261       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10262
10263       node_lvs = self.rpc.call_lv_list([pnode.name],
10264                                        vg_names.payload.keys())[pnode.name]
10265       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10266       node_lvs = node_lvs.payload
10267
10268       delta = all_lvs.difference(node_lvs.keys())
10269       if delta:
10270         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10271                                    utils.CommaJoin(delta),
10272                                    errors.ECODE_INVAL)
10273       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10274       if online_lvs:
10275         raise errors.OpPrereqError("Online logical volumes found, cannot"
10276                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10277                                    errors.ECODE_STATE)
10278       # update the size of disk based on what is found
10279       for dsk in self.disks:
10280         dsk[constants.IDISK_SIZE] = \
10281           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10282                                         dsk[constants.IDISK_ADOPT])][0]))
10283
10284     elif self.op.disk_template == constants.DT_BLOCK:
10285       # Normalize and de-duplicate device paths
10286       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10287                        for disk in self.disks])
10288       if len(all_disks) != len(self.disks):
10289         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10290                                    errors.ECODE_INVAL)
10291       baddisks = [d for d in all_disks
10292                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10293       if baddisks:
10294         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10295                                    " cannot be adopted" %
10296                                    (", ".join(baddisks),
10297                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10298                                    errors.ECODE_INVAL)
10299
10300       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10301                                             list(all_disks))[pnode.name]
10302       node_disks.Raise("Cannot get block device information from node %s" %
10303                        pnode.name)
10304       node_disks = node_disks.payload
10305       delta = all_disks.difference(node_disks.keys())
10306       if delta:
10307         raise errors.OpPrereqError("Missing block device(s): %s" %
10308                                    utils.CommaJoin(delta),
10309                                    errors.ECODE_INVAL)
10310       for dsk in self.disks:
10311         dsk[constants.IDISK_SIZE] = \
10312           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10313
10314     # Verify instance specs
10315     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10316     ispec = {
10317       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319       constants.ISPEC_DISK_COUNT: len(self.disks),
10320       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10321                                   for disk in self.disks],
10322       constants.ISPEC_NIC_COUNT: len(self.nics),
10323       constants.ISPEC_SPINDLE_USE: spindle_use,
10324       }
10325
10326     group_info = self.cfg.GetNodeGroup(pnode.group)
10327     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10328     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10329     if not self.op.ignore_ipolicy and res:
10330       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10331                                   " policy: %s") % (pnode.group,
10332                                                     utils.CommaJoin(res)),
10333                                   errors.ECODE_INVAL)
10334
10335     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10336
10337     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10338     # check OS parameters (remotely)
10339     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10340
10341     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10342
10343     # memory check on primary node
10344     #TODO(dynmem): use MINMEM for checking
10345     if self.op.start:
10346       _CheckNodeFreeMemory(self, self.pnode.name,
10347                            "creating instance %s" % self.op.instance_name,
10348                            self.be_full[constants.BE_MAXMEM],
10349                            self.op.hypervisor)
10350
10351     self.dry_run_result = list(nodenames)
10352
10353   def Exec(self, feedback_fn):
10354     """Create and add the instance to the cluster.
10355
10356     """
10357     instance = self.op.instance_name
10358     pnode_name = self.pnode.name
10359
10360     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10361                 self.owned_locks(locking.LEVEL_NODE)), \
10362       "Node locks differ from node resource locks"
10363
10364     ht_kind = self.op.hypervisor
10365     if ht_kind in constants.HTS_REQ_PORT:
10366       network_port = self.cfg.AllocatePort()
10367     else:
10368       network_port = None
10369
10370     # This is ugly but we got a chicken-egg problem here
10371     # We can only take the group disk parameters, as the instance
10372     # has no disks yet (we are generating them right here).
10373     node = self.cfg.GetNodeInfo(pnode_name)
10374     nodegroup = self.cfg.GetNodeGroup(node.group)
10375     disks = _GenerateDiskTemplate(self,
10376                                   self.op.disk_template,
10377                                   instance, pnode_name,
10378                                   self.secondaries,
10379                                   self.disks,
10380                                   self.instance_file_storage_dir,
10381                                   self.op.file_driver,
10382                                   0,
10383                                   feedback_fn,
10384                                   self.cfg.GetGroupDiskParams(nodegroup))
10385
10386     iobj = objects.Instance(name=instance, os=self.op.os_type,
10387                             primary_node=pnode_name,
10388                             nics=self.nics, disks=disks,
10389                             disk_template=self.op.disk_template,
10390                             admin_state=constants.ADMINST_DOWN,
10391                             network_port=network_port,
10392                             beparams=self.op.beparams,
10393                             hvparams=self.op.hvparams,
10394                             hypervisor=self.op.hypervisor,
10395                             osparams=self.op.osparams,
10396                             )
10397
10398     if self.op.tags:
10399       for tag in self.op.tags:
10400         iobj.AddTag(tag)
10401
10402     if self.adopt_disks:
10403       if self.op.disk_template == constants.DT_PLAIN:
10404         # rename LVs to the newly-generated names; we need to construct
10405         # 'fake' LV disks with the old data, plus the new unique_id
10406         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10407         rename_to = []
10408         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10409           rename_to.append(t_dsk.logical_id)
10410           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10411           self.cfg.SetDiskID(t_dsk, pnode_name)
10412         result = self.rpc.call_blockdev_rename(pnode_name,
10413                                                zip(tmp_disks, rename_to))
10414         result.Raise("Failed to rename adoped LVs")
10415     else:
10416       feedback_fn("* creating instance disks...")
10417       try:
10418         _CreateDisks(self, iobj)
10419       except errors.OpExecError:
10420         self.LogWarning("Device creation failed, reverting...")
10421         try:
10422           _RemoveDisks(self, iobj)
10423         finally:
10424           self.cfg.ReleaseDRBDMinors(instance)
10425           raise
10426
10427     feedback_fn("adding instance %s to cluster config" % instance)
10428
10429     self.cfg.AddInstance(iobj, self.proc.GetECId())
10430
10431     # Declare that we don't want to remove the instance lock anymore, as we've
10432     # added the instance to the config
10433     del self.remove_locks[locking.LEVEL_INSTANCE]
10434
10435     if self.op.mode == constants.INSTANCE_IMPORT:
10436       # Release unused nodes
10437       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10438     else:
10439       # Release all nodes
10440       _ReleaseLocks(self, locking.LEVEL_NODE)
10441
10442     disk_abort = False
10443     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10444       feedback_fn("* wiping instance disks...")
10445       try:
10446         _WipeDisks(self, iobj)
10447       except errors.OpExecError, err:
10448         logging.exception("Wiping disks failed")
10449         self.LogWarning("Wiping instance disks failed (%s)", err)
10450         disk_abort = True
10451
10452     if disk_abort:
10453       # Something is already wrong with the disks, don't do anything else
10454       pass
10455     elif self.op.wait_for_sync:
10456       disk_abort = not _WaitForSync(self, iobj)
10457     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10458       # make sure the disks are not degraded (still sync-ing is ok)
10459       feedback_fn("* checking mirrors status")
10460       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10461     else:
10462       disk_abort = False
10463
10464     if disk_abort:
10465       _RemoveDisks(self, iobj)
10466       self.cfg.RemoveInstance(iobj.name)
10467       # Make sure the instance lock gets removed
10468       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10469       raise errors.OpExecError("There are some degraded disks for"
10470                                " this instance")
10471
10472     # Release all node resource locks
10473     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10474
10475     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10476       # we need to set the disks ID to the primary node, since the
10477       # preceding code might or might have not done it, depending on
10478       # disk template and other options
10479       for disk in iobj.disks:
10480         self.cfg.SetDiskID(disk, pnode_name)
10481       if self.op.mode == constants.INSTANCE_CREATE:
10482         if not self.op.no_install:
10483           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10484                         not self.op.wait_for_sync)
10485           if pause_sync:
10486             feedback_fn("* pausing disk sync to install instance OS")
10487             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10488                                                               (iobj.disks,
10489                                                                iobj), True)
10490             for idx, success in enumerate(result.payload):
10491               if not success:
10492                 logging.warn("pause-sync of instance %s for disk %d failed",
10493                              instance, idx)
10494
10495           feedback_fn("* running the instance OS create scripts...")
10496           # FIXME: pass debug option from opcode to backend
10497           os_add_result = \
10498             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10499                                           self.op.debug_level)
10500           if pause_sync:
10501             feedback_fn("* resuming disk sync")
10502             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10503                                                               (iobj.disks,
10504                                                                iobj), False)
10505             for idx, success in enumerate(result.payload):
10506               if not success:
10507                 logging.warn("resume-sync of instance %s for disk %d failed",
10508                              instance, idx)
10509
10510           os_add_result.Raise("Could not add os for instance %s"
10511                               " on node %s" % (instance, pnode_name))
10512
10513       else:
10514         if self.op.mode == constants.INSTANCE_IMPORT:
10515           feedback_fn("* running the instance OS import scripts...")
10516
10517           transfers = []
10518
10519           for idx, image in enumerate(self.src_images):
10520             if not image:
10521               continue
10522
10523             # FIXME: pass debug option from opcode to backend
10524             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10525                                                constants.IEIO_FILE, (image, ),
10526                                                constants.IEIO_SCRIPT,
10527                                                (iobj.disks[idx], idx),
10528                                                None)
10529             transfers.append(dt)
10530
10531           import_result = \
10532             masterd.instance.TransferInstanceData(self, feedback_fn,
10533                                                   self.op.src_node, pnode_name,
10534                                                   self.pnode.secondary_ip,
10535                                                   iobj, transfers)
10536           if not compat.all(import_result):
10537             self.LogWarning("Some disks for instance %s on node %s were not"
10538                             " imported successfully" % (instance, pnode_name))
10539
10540           rename_from = self._old_instance_name
10541
10542         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10543           feedback_fn("* preparing remote import...")
10544           # The source cluster will stop the instance before attempting to make
10545           # a connection. In some cases stopping an instance can take a long
10546           # time, hence the shutdown timeout is added to the connection
10547           # timeout.
10548           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10549                              self.op.source_shutdown_timeout)
10550           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10551
10552           assert iobj.primary_node == self.pnode.name
10553           disk_results = \
10554             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10555                                           self.source_x509_ca,
10556                                           self._cds, timeouts)
10557           if not compat.all(disk_results):
10558             # TODO: Should the instance still be started, even if some disks
10559             # failed to import (valid for local imports, too)?
10560             self.LogWarning("Some disks for instance %s on node %s were not"
10561                             " imported successfully" % (instance, pnode_name))
10562
10563           rename_from = self.source_instance_name
10564
10565         else:
10566           # also checked in the prereq part
10567           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10568                                        % self.op.mode)
10569
10570         # Run rename script on newly imported instance
10571         assert iobj.name == instance
10572         feedback_fn("Running rename script for %s" % instance)
10573         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10574                                                    rename_from,
10575                                                    self.op.debug_level)
10576         if result.fail_msg:
10577           self.LogWarning("Failed to run rename script for %s on node"
10578                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10579
10580     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10581
10582     if self.op.start:
10583       iobj.admin_state = constants.ADMINST_UP
10584       self.cfg.Update(iobj, feedback_fn)
10585       logging.info("Starting instance %s on node %s", instance, pnode_name)
10586       feedback_fn("* starting instance...")
10587       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10588                                             False)
10589       result.Raise("Could not start instance")
10590
10591     return list(iobj.all_nodes)
10592
10593
10594 class LUInstanceMultiAlloc(NoHooksLU):
10595   """Allocates multiple instances at the same time.
10596
10597   """
10598   REQ_BGL = False
10599
10600   def CheckArguments(self):
10601     """Check arguments.
10602
10603     """
10604     nodes = []
10605     for inst in self.op.instances:
10606       if inst.iallocator is not None:
10607         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10608                                    " instance objects", errors.ECODE_INVAL)
10609       nodes.append(bool(inst.pnode))
10610       if inst.disk_template in constants.DTS_INT_MIRROR:
10611         nodes.append(bool(inst.snode))
10612
10613     has_nodes = compat.any(nodes)
10614     if compat.all(nodes) ^ has_nodes:
10615       raise errors.OpPrereqError("There are instance objects providing"
10616                                  " pnode/snode while others do not",
10617                                  errors.ECODE_INVAL)
10618
10619     if self.op.iallocator is None:
10620       default_iallocator = self.cfg.GetDefaultIAllocator()
10621       if default_iallocator and has_nodes:
10622         self.op.iallocator = default_iallocator
10623       else:
10624         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10625                                    " given and no cluster-wide default"
10626                                    " iallocator found; please specify either"
10627                                    " an iallocator or nodes on the instances"
10628                                    " or set a cluster-wide default iallocator",
10629                                    errors.ECODE_INVAL)
10630
10631     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10632     if dups:
10633       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10634                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10635
10636   def ExpandNames(self):
10637     """Calculate the locks.
10638
10639     """
10640     self.share_locks = _ShareAll()
10641     self.needed_locks = {}
10642
10643     if self.op.iallocator:
10644       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10645       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10646     else:
10647       nodeslist = []
10648       for inst in self.op.instances:
10649         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10650         nodeslist.append(inst.pnode)
10651         if inst.snode is not None:
10652           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10653           nodeslist.append(inst.snode)
10654
10655       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10656       # Lock resources of instance's primary and secondary nodes (copy to
10657       # prevent accidential modification)
10658       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10659
10660   def CheckPrereq(self):
10661     """Check prerequisite.
10662
10663     """
10664     cluster = self.cfg.GetClusterInfo()
10665     default_vg = self.cfg.GetVGName()
10666     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10667                                          _ComputeNics(op, cluster, None,
10668                                                       self.cfg, self.proc),
10669                                          _ComputeFullBeParams(op, cluster))
10670              for op in self.op.instances]
10671     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10672     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10673
10674     ial.Run(self.op.iallocator)
10675
10676     if not ial.success:
10677       raise errors.OpPrereqError("Can't compute nodes using"
10678                                  " iallocator '%s': %s" %
10679                                  (self.op.iallocator, ial.info),
10680                                  errors.ECODE_NORES)
10681
10682     self.ia_result = ial.result
10683
10684     if self.op.dry_run:
10685       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10686         constants.JOB_IDS_KEY: [],
10687         })
10688
10689   def _ConstructPartialResult(self):
10690     """Contructs the partial result.
10691
10692     """
10693     (allocatable, failed) = self.ia_result
10694     return {
10695       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10696         map(compat.fst, allocatable),
10697       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10698       }
10699
10700   def Exec(self, feedback_fn):
10701     """Executes the opcode.
10702
10703     """
10704     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10705     (allocatable, failed) = self.ia_result
10706
10707     jobs = []
10708     for (name, nodes) in allocatable:
10709       op = op2inst.pop(name)
10710
10711       if len(nodes) > 1:
10712         (op.pnode, op.snode) = nodes
10713       else:
10714         (op.pnode,) = nodes
10715
10716       jobs.append([op])
10717
10718     missing = set(op2inst.keys()) - set(failed)
10719     assert not missing, \
10720       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10721
10722     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10723
10724
10725 def _CheckRADOSFreeSpace():
10726   """Compute disk size requirements inside the RADOS cluster.
10727
10728   """
10729   # For the RADOS cluster we assume there is always enough space.
10730   pass
10731
10732
10733 class LUInstanceConsole(NoHooksLU):
10734   """Connect to an instance's console.
10735
10736   This is somewhat special in that it returns the command line that
10737   you need to run on the master node in order to connect to the
10738   console.
10739
10740   """
10741   REQ_BGL = False
10742
10743   def ExpandNames(self):
10744     self.share_locks = _ShareAll()
10745     self._ExpandAndLockInstance()
10746
10747   def CheckPrereq(self):
10748     """Check prerequisites.
10749
10750     This checks that the instance is in the cluster.
10751
10752     """
10753     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10754     assert self.instance is not None, \
10755       "Cannot retrieve locked instance %s" % self.op.instance_name
10756     _CheckNodeOnline(self, self.instance.primary_node)
10757
10758   def Exec(self, feedback_fn):
10759     """Connect to the console of an instance
10760
10761     """
10762     instance = self.instance
10763     node = instance.primary_node
10764
10765     node_insts = self.rpc.call_instance_list([node],
10766                                              [instance.hypervisor])[node]
10767     node_insts.Raise("Can't get node information from %s" % node)
10768
10769     if instance.name not in node_insts.payload:
10770       if instance.admin_state == constants.ADMINST_UP:
10771         state = constants.INSTST_ERRORDOWN
10772       elif instance.admin_state == constants.ADMINST_DOWN:
10773         state = constants.INSTST_ADMINDOWN
10774       else:
10775         state = constants.INSTST_ADMINOFFLINE
10776       raise errors.OpExecError("Instance %s is not running (state %s)" %
10777                                (instance.name, state))
10778
10779     logging.debug("Connecting to console of %s on %s", instance.name, node)
10780
10781     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10782
10783
10784 def _GetInstanceConsole(cluster, instance):
10785   """Returns console information for an instance.
10786
10787   @type cluster: L{objects.Cluster}
10788   @type instance: L{objects.Instance}
10789   @rtype: dict
10790
10791   """
10792   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10793   # beparams and hvparams are passed separately, to avoid editing the
10794   # instance and then saving the defaults in the instance itself.
10795   hvparams = cluster.FillHV(instance)
10796   beparams = cluster.FillBE(instance)
10797   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10798
10799   assert console.instance == instance.name
10800   assert console.Validate()
10801
10802   return console.ToDict()
10803
10804
10805 class LUInstanceReplaceDisks(LogicalUnit):
10806   """Replace the disks of an instance.
10807
10808   """
10809   HPATH = "mirrors-replace"
10810   HTYPE = constants.HTYPE_INSTANCE
10811   REQ_BGL = False
10812
10813   def CheckArguments(self):
10814     """Check arguments.
10815
10816     """
10817     remote_node = self.op.remote_node
10818     ialloc = self.op.iallocator
10819     if self.op.mode == constants.REPLACE_DISK_CHG:
10820       if remote_node is None and ialloc is None:
10821         raise errors.OpPrereqError("When changing the secondary either an"
10822                                    " iallocator script must be used or the"
10823                                    " new node given", errors.ECODE_INVAL)
10824       else:
10825         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10826
10827     elif remote_node is not None or ialloc is not None:
10828       # Not replacing the secondary
10829       raise errors.OpPrereqError("The iallocator and new node options can"
10830                                  " only be used when changing the"
10831                                  " secondary node", errors.ECODE_INVAL)
10832
10833   def ExpandNames(self):
10834     self._ExpandAndLockInstance()
10835
10836     assert locking.LEVEL_NODE not in self.needed_locks
10837     assert locking.LEVEL_NODE_RES not in self.needed_locks
10838     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10839
10840     assert self.op.iallocator is None or self.op.remote_node is None, \
10841       "Conflicting options"
10842
10843     if self.op.remote_node is not None:
10844       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10845
10846       # Warning: do not remove the locking of the new secondary here
10847       # unless DRBD8.AddChildren is changed to work in parallel;
10848       # currently it doesn't since parallel invocations of
10849       # FindUnusedMinor will conflict
10850       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10851       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10852     else:
10853       self.needed_locks[locking.LEVEL_NODE] = []
10854       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10855
10856       if self.op.iallocator is not None:
10857         # iallocator will select a new node in the same group
10858         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10859
10860     self.needed_locks[locking.LEVEL_NODE_RES] = []
10861
10862     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10863                                    self.op.iallocator, self.op.remote_node,
10864                                    self.op.disks, False, self.op.early_release,
10865                                    self.op.ignore_ipolicy)
10866
10867     self.tasklets = [self.replacer]
10868
10869   def DeclareLocks(self, level):
10870     if level == locking.LEVEL_NODEGROUP:
10871       assert self.op.remote_node is None
10872       assert self.op.iallocator is not None
10873       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10874
10875       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10876       # Lock all groups used by instance optimistically; this requires going
10877       # via the node before it's locked, requiring verification later on
10878       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10879         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10880
10881     elif level == locking.LEVEL_NODE:
10882       if self.op.iallocator is not None:
10883         assert self.op.remote_node is None
10884         assert not self.needed_locks[locking.LEVEL_NODE]
10885
10886         # Lock member nodes of all locked groups
10887         self.needed_locks[locking.LEVEL_NODE] = \
10888             [node_name
10889              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10890              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10891       else:
10892         self._LockInstancesNodes()
10893     elif level == locking.LEVEL_NODE_RES:
10894       # Reuse node locks
10895       self.needed_locks[locking.LEVEL_NODE_RES] = \
10896         self.needed_locks[locking.LEVEL_NODE]
10897
10898   def BuildHooksEnv(self):
10899     """Build hooks env.
10900
10901     This runs on the master, the primary and all the secondaries.
10902
10903     """
10904     instance = self.replacer.instance
10905     env = {
10906       "MODE": self.op.mode,
10907       "NEW_SECONDARY": self.op.remote_node,
10908       "OLD_SECONDARY": instance.secondary_nodes[0],
10909       }
10910     env.update(_BuildInstanceHookEnvByObject(self, instance))
10911     return env
10912
10913   def BuildHooksNodes(self):
10914     """Build hooks nodes.
10915
10916     """
10917     instance = self.replacer.instance
10918     nl = [
10919       self.cfg.GetMasterNode(),
10920       instance.primary_node,
10921       ]
10922     if self.op.remote_node is not None:
10923       nl.append(self.op.remote_node)
10924     return nl, nl
10925
10926   def CheckPrereq(self):
10927     """Check prerequisites.
10928
10929     """
10930     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10931             self.op.iallocator is None)
10932
10933     # Verify if node group locks are still correct
10934     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10935     if owned_groups:
10936       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10937
10938     return LogicalUnit.CheckPrereq(self)
10939
10940
10941 class TLReplaceDisks(Tasklet):
10942   """Replaces disks for an instance.
10943
10944   Note: Locking is not within the scope of this class.
10945
10946   """
10947   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10948                disks, delay_iallocator, early_release, ignore_ipolicy):
10949     """Initializes this class.
10950
10951     """
10952     Tasklet.__init__(self, lu)
10953
10954     # Parameters
10955     self.instance_name = instance_name
10956     self.mode = mode
10957     self.iallocator_name = iallocator_name
10958     self.remote_node = remote_node
10959     self.disks = disks
10960     self.delay_iallocator = delay_iallocator
10961     self.early_release = early_release
10962     self.ignore_ipolicy = ignore_ipolicy
10963
10964     # Runtime data
10965     self.instance = None
10966     self.new_node = None
10967     self.target_node = None
10968     self.other_node = None
10969     self.remote_node_info = None
10970     self.node_secondary_ip = None
10971
10972   @staticmethod
10973   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10974     """Compute a new secondary node using an IAllocator.
10975
10976     """
10977     req = iallocator.IAReqRelocate(name=instance_name,
10978                                    relocate_from=list(relocate_from))
10979     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10980
10981     ial.Run(iallocator_name)
10982
10983     if not ial.success:
10984       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10985                                  " %s" % (iallocator_name, ial.info),
10986                                  errors.ECODE_NORES)
10987
10988     remote_node_name = ial.result[0]
10989
10990     lu.LogInfo("Selected new secondary for instance '%s': %s",
10991                instance_name, remote_node_name)
10992
10993     return remote_node_name
10994
10995   def _FindFaultyDisks(self, node_name):
10996     """Wrapper for L{_FindFaultyInstanceDisks}.
10997
10998     """
10999     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11000                                     node_name, True)
11001
11002   def _CheckDisksActivated(self, instance):
11003     """Checks if the instance disks are activated.
11004
11005     @param instance: The instance to check disks
11006     @return: True if they are activated, False otherwise
11007
11008     """
11009     nodes = instance.all_nodes
11010
11011     for idx, dev in enumerate(instance.disks):
11012       for node in nodes:
11013         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11014         self.cfg.SetDiskID(dev, node)
11015
11016         result = _BlockdevFind(self, node, dev, instance)
11017
11018         if result.offline:
11019           continue
11020         elif result.fail_msg or not result.payload:
11021           return False
11022
11023     return True
11024
11025   def CheckPrereq(self):
11026     """Check prerequisites.
11027
11028     This checks that the instance is in the cluster.
11029
11030     """
11031     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11032     assert instance is not None, \
11033       "Cannot retrieve locked instance %s" % self.instance_name
11034
11035     if instance.disk_template != constants.DT_DRBD8:
11036       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11037                                  " instances", errors.ECODE_INVAL)
11038
11039     if len(instance.secondary_nodes) != 1:
11040       raise errors.OpPrereqError("The instance has a strange layout,"
11041                                  " expected one secondary but found %d" %
11042                                  len(instance.secondary_nodes),
11043                                  errors.ECODE_FAULT)
11044
11045     if not self.delay_iallocator:
11046       self._CheckPrereq2()
11047
11048   def _CheckPrereq2(self):
11049     """Check prerequisites, second part.
11050
11051     This function should always be part of CheckPrereq. It was separated and is
11052     now called from Exec because during node evacuation iallocator was only
11053     called with an unmodified cluster model, not taking planned changes into
11054     account.
11055
11056     """
11057     instance = self.instance
11058     secondary_node = instance.secondary_nodes[0]
11059
11060     if self.iallocator_name is None:
11061       remote_node = self.remote_node
11062     else:
11063       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11064                                        instance.name, instance.secondary_nodes)
11065
11066     if remote_node is None:
11067       self.remote_node_info = None
11068     else:
11069       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11070              "Remote node '%s' is not locked" % remote_node
11071
11072       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11073       assert self.remote_node_info is not None, \
11074         "Cannot retrieve locked node %s" % remote_node
11075
11076     if remote_node == self.instance.primary_node:
11077       raise errors.OpPrereqError("The specified node is the primary node of"
11078                                  " the instance", errors.ECODE_INVAL)
11079
11080     if remote_node == secondary_node:
11081       raise errors.OpPrereqError("The specified node is already the"
11082                                  " secondary node of the instance",
11083                                  errors.ECODE_INVAL)
11084
11085     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11086                                     constants.REPLACE_DISK_CHG):
11087       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11088                                  errors.ECODE_INVAL)
11089
11090     if self.mode == constants.REPLACE_DISK_AUTO:
11091       if not self._CheckDisksActivated(instance):
11092         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11093                                    " first" % self.instance_name,
11094                                    errors.ECODE_STATE)
11095       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11096       faulty_secondary = self._FindFaultyDisks(secondary_node)
11097
11098       if faulty_primary and faulty_secondary:
11099         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11100                                    " one node and can not be repaired"
11101                                    " automatically" % self.instance_name,
11102                                    errors.ECODE_STATE)
11103
11104       if faulty_primary:
11105         self.disks = faulty_primary
11106         self.target_node = instance.primary_node
11107         self.other_node = secondary_node
11108         check_nodes = [self.target_node, self.other_node]
11109       elif faulty_secondary:
11110         self.disks = faulty_secondary
11111         self.target_node = secondary_node
11112         self.other_node = instance.primary_node
11113         check_nodes = [self.target_node, self.other_node]
11114       else:
11115         self.disks = []
11116         check_nodes = []
11117
11118     else:
11119       # Non-automatic modes
11120       if self.mode == constants.REPLACE_DISK_PRI:
11121         self.target_node = instance.primary_node
11122         self.other_node = secondary_node
11123         check_nodes = [self.target_node, self.other_node]
11124
11125       elif self.mode == constants.REPLACE_DISK_SEC:
11126         self.target_node = secondary_node
11127         self.other_node = instance.primary_node
11128         check_nodes = [self.target_node, self.other_node]
11129
11130       elif self.mode == constants.REPLACE_DISK_CHG:
11131         self.new_node = remote_node
11132         self.other_node = instance.primary_node
11133         self.target_node = secondary_node
11134         check_nodes = [self.new_node, self.other_node]
11135
11136         _CheckNodeNotDrained(self.lu, remote_node)
11137         _CheckNodeVmCapable(self.lu, remote_node)
11138
11139         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11140         assert old_node_info is not None
11141         if old_node_info.offline and not self.early_release:
11142           # doesn't make sense to delay the release
11143           self.early_release = True
11144           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11145                           " early-release mode", secondary_node)
11146
11147       else:
11148         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11149                                      self.mode)
11150
11151       # If not specified all disks should be replaced
11152       if not self.disks:
11153         self.disks = range(len(self.instance.disks))
11154
11155     # TODO: This is ugly, but right now we can't distinguish between internal
11156     # submitted opcode and external one. We should fix that.
11157     if self.remote_node_info:
11158       # We change the node, lets verify it still meets instance policy
11159       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11160       cluster = self.cfg.GetClusterInfo()
11161       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11162                                                               new_group_info)
11163       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11164                               ignore=self.ignore_ipolicy)
11165
11166     for node in check_nodes:
11167       _CheckNodeOnline(self.lu, node)
11168
11169     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11170                                                           self.other_node,
11171                                                           self.target_node]
11172                               if node_name is not None)
11173
11174     # Release unneeded node and node resource locks
11175     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11176     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11177
11178     # Release any owned node group
11179     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11180       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11181
11182     # Check whether disks are valid
11183     for disk_idx in self.disks:
11184       instance.FindDisk(disk_idx)
11185
11186     # Get secondary node IP addresses
11187     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11188                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11189
11190   def Exec(self, feedback_fn):
11191     """Execute disk replacement.
11192
11193     This dispatches the disk replacement to the appropriate handler.
11194
11195     """
11196     if self.delay_iallocator:
11197       self._CheckPrereq2()
11198
11199     if __debug__:
11200       # Verify owned locks before starting operation
11201       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11202       assert set(owned_nodes) == set(self.node_secondary_ip), \
11203           ("Incorrect node locks, owning %s, expected %s" %
11204            (owned_nodes, self.node_secondary_ip.keys()))
11205       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11206               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11207
11208       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11209       assert list(owned_instances) == [self.instance_name], \
11210           "Instance '%s' not locked" % self.instance_name
11211
11212       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11213           "Should not own any node group lock at this point"
11214
11215     if not self.disks:
11216       feedback_fn("No disks need replacement for instance '%s'" %
11217                   self.instance.name)
11218       return
11219
11220     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11221                 (utils.CommaJoin(self.disks), self.instance.name))
11222     feedback_fn("Current primary node: %s", self.instance.primary_node)
11223     feedback_fn("Current seconary node: %s",
11224                 utils.CommaJoin(self.instance.secondary_nodes))
11225
11226     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11227
11228     # Activate the instance disks if we're replacing them on a down instance
11229     if activate_disks:
11230       _StartInstanceDisks(self.lu, self.instance, True)
11231
11232     try:
11233       # Should we replace the secondary node?
11234       if self.new_node is not None:
11235         fn = self._ExecDrbd8Secondary
11236       else:
11237         fn = self._ExecDrbd8DiskOnly
11238
11239       result = fn(feedback_fn)
11240     finally:
11241       # Deactivate the instance disks if we're replacing them on a
11242       # down instance
11243       if activate_disks:
11244         _SafeShutdownInstanceDisks(self.lu, self.instance)
11245
11246     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11247
11248     if __debug__:
11249       # Verify owned locks
11250       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11251       nodes = frozenset(self.node_secondary_ip)
11252       assert ((self.early_release and not owned_nodes) or
11253               (not self.early_release and not (set(owned_nodes) - nodes))), \
11254         ("Not owning the correct locks, early_release=%s, owned=%r,"
11255          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11256
11257     return result
11258
11259   def _CheckVolumeGroup(self, nodes):
11260     self.lu.LogInfo("Checking volume groups")
11261
11262     vgname = self.cfg.GetVGName()
11263
11264     # Make sure volume group exists on all involved nodes
11265     results = self.rpc.call_vg_list(nodes)
11266     if not results:
11267       raise errors.OpExecError("Can't list volume groups on the nodes")
11268
11269     for node in nodes:
11270       res = results[node]
11271       res.Raise("Error checking node %s" % node)
11272       if vgname not in res.payload:
11273         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11274                                  (vgname, node))
11275
11276   def _CheckDisksExistence(self, nodes):
11277     # Check disk existence
11278     for idx, dev in enumerate(self.instance.disks):
11279       if idx not in self.disks:
11280         continue
11281
11282       for node in nodes:
11283         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11284         self.cfg.SetDiskID(dev, node)
11285
11286         result = _BlockdevFind(self, node, dev, self.instance)
11287
11288         msg = result.fail_msg
11289         if msg or not result.payload:
11290           if not msg:
11291             msg = "disk not found"
11292           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11293                                    (idx, node, msg))
11294
11295   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11296     for idx, dev in enumerate(self.instance.disks):
11297       if idx not in self.disks:
11298         continue
11299
11300       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11301                       (idx, node_name))
11302
11303       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11304                                    on_primary, ldisk=ldisk):
11305         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11306                                  " replace disks for instance %s" %
11307                                  (node_name, self.instance.name))
11308
11309   def _CreateNewStorage(self, node_name):
11310     """Create new storage on the primary or secondary node.
11311
11312     This is only used for same-node replaces, not for changing the
11313     secondary node, hence we don't want to modify the existing disk.
11314
11315     """
11316     iv_names = {}
11317
11318     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11319     for idx, dev in enumerate(disks):
11320       if idx not in self.disks:
11321         continue
11322
11323       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11324
11325       self.cfg.SetDiskID(dev, node_name)
11326
11327       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11328       names = _GenerateUniqueNames(self.lu, lv_names)
11329
11330       (data_disk, meta_disk) = dev.children
11331       vg_data = data_disk.logical_id[0]
11332       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11333                              logical_id=(vg_data, names[0]),
11334                              params=data_disk.params)
11335       vg_meta = meta_disk.logical_id[0]
11336       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11337                              size=constants.DRBD_META_SIZE,
11338                              logical_id=(vg_meta, names[1]),
11339                              params=meta_disk.params)
11340
11341       new_lvs = [lv_data, lv_meta]
11342       old_lvs = [child.Copy() for child in dev.children]
11343       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11344
11345       # we pass force_create=True to force the LVM creation
11346       for new_lv in new_lvs:
11347         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11348                              _GetInstanceInfoText(self.instance), False)
11349
11350     return iv_names
11351
11352   def _CheckDevices(self, node_name, iv_names):
11353     for name, (dev, _, _) in iv_names.iteritems():
11354       self.cfg.SetDiskID(dev, node_name)
11355
11356       result = _BlockdevFind(self, node_name, dev, self.instance)
11357
11358       msg = result.fail_msg
11359       if msg or not result.payload:
11360         if not msg:
11361           msg = "disk not found"
11362         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11363                                  (name, msg))
11364
11365       if result.payload.is_degraded:
11366         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11367
11368   def _RemoveOldStorage(self, node_name, iv_names):
11369     for name, (_, old_lvs, _) in iv_names.iteritems():
11370       self.lu.LogInfo("Remove logical volumes for %s" % name)
11371
11372       for lv in old_lvs:
11373         self.cfg.SetDiskID(lv, node_name)
11374
11375         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11376         if msg:
11377           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11378                              hint="remove unused LVs manually")
11379
11380   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11381     """Replace a disk on the primary or secondary for DRBD 8.
11382
11383     The algorithm for replace is quite complicated:
11384
11385       1. for each disk to be replaced:
11386
11387         1. create new LVs on the target node with unique names
11388         1. detach old LVs from the drbd device
11389         1. rename old LVs to name_replaced.<time_t>
11390         1. rename new LVs to old LVs
11391         1. attach the new LVs (with the old names now) to the drbd device
11392
11393       1. wait for sync across all devices
11394
11395       1. for each modified disk:
11396
11397         1. remove old LVs (which have the name name_replaces.<time_t>)
11398
11399     Failures are not very well handled.
11400
11401     """
11402     steps_total = 6
11403
11404     # Step: check device activation
11405     self.lu.LogStep(1, steps_total, "Check device existence")
11406     self._CheckDisksExistence([self.other_node, self.target_node])
11407     self._CheckVolumeGroup([self.target_node, self.other_node])
11408
11409     # Step: check other node consistency
11410     self.lu.LogStep(2, steps_total, "Check peer consistency")
11411     self._CheckDisksConsistency(self.other_node,
11412                                 self.other_node == self.instance.primary_node,
11413                                 False)
11414
11415     # Step: create new storage
11416     self.lu.LogStep(3, steps_total, "Allocate new storage")
11417     iv_names = self._CreateNewStorage(self.target_node)
11418
11419     # Step: for each lv, detach+rename*2+attach
11420     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11421     for dev, old_lvs, new_lvs in iv_names.itervalues():
11422       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11423
11424       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11425                                                      old_lvs)
11426       result.Raise("Can't detach drbd from local storage on node"
11427                    " %s for device %s" % (self.target_node, dev.iv_name))
11428       #dev.children = []
11429       #cfg.Update(instance)
11430
11431       # ok, we created the new LVs, so now we know we have the needed
11432       # storage; as such, we proceed on the target node to rename
11433       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11434       # using the assumption that logical_id == physical_id (which in
11435       # turn is the unique_id on that node)
11436
11437       # FIXME(iustin): use a better name for the replaced LVs
11438       temp_suffix = int(time.time())
11439       ren_fn = lambda d, suff: (d.physical_id[0],
11440                                 d.physical_id[1] + "_replaced-%s" % suff)
11441
11442       # Build the rename list based on what LVs exist on the node
11443       rename_old_to_new = []
11444       for to_ren in old_lvs:
11445         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11446         if not result.fail_msg and result.payload:
11447           # device exists
11448           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11449
11450       self.lu.LogInfo("Renaming the old LVs on the target node")
11451       result = self.rpc.call_blockdev_rename(self.target_node,
11452                                              rename_old_to_new)
11453       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11454
11455       # Now we rename the new LVs to the old LVs
11456       self.lu.LogInfo("Renaming the new LVs on the target node")
11457       rename_new_to_old = [(new, old.physical_id)
11458                            for old, new in zip(old_lvs, new_lvs)]
11459       result = self.rpc.call_blockdev_rename(self.target_node,
11460                                              rename_new_to_old)
11461       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11462
11463       # Intermediate steps of in memory modifications
11464       for old, new in zip(old_lvs, new_lvs):
11465         new.logical_id = old.logical_id
11466         self.cfg.SetDiskID(new, self.target_node)
11467
11468       # We need to modify old_lvs so that removal later removes the
11469       # right LVs, not the newly added ones; note that old_lvs is a
11470       # copy here
11471       for disk in old_lvs:
11472         disk.logical_id = ren_fn(disk, temp_suffix)
11473         self.cfg.SetDiskID(disk, self.target_node)
11474
11475       # Now that the new lvs have the old name, we can add them to the device
11476       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11477       result = self.rpc.call_blockdev_addchildren(self.target_node,
11478                                                   (dev, self.instance), new_lvs)
11479       msg = result.fail_msg
11480       if msg:
11481         for new_lv in new_lvs:
11482           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11483                                                new_lv).fail_msg
11484           if msg2:
11485             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11486                                hint=("cleanup manually the unused logical"
11487                                      "volumes"))
11488         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11489
11490     cstep = itertools.count(5)
11491
11492     if self.early_release:
11493       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11494       self._RemoveOldStorage(self.target_node, iv_names)
11495       # TODO: Check if releasing locks early still makes sense
11496       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11497     else:
11498       # Release all resource locks except those used by the instance
11499       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11500                     keep=self.node_secondary_ip.keys())
11501
11502     # Release all node locks while waiting for sync
11503     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11504
11505     # TODO: Can the instance lock be downgraded here? Take the optional disk
11506     # shutdown in the caller into consideration.
11507
11508     # Wait for sync
11509     # This can fail as the old devices are degraded and _WaitForSync
11510     # does a combined result over all disks, so we don't check its return value
11511     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11512     _WaitForSync(self.lu, self.instance)
11513
11514     # Check all devices manually
11515     self._CheckDevices(self.instance.primary_node, iv_names)
11516
11517     # Step: remove old storage
11518     if not self.early_release:
11519       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11520       self._RemoveOldStorage(self.target_node, iv_names)
11521
11522   def _ExecDrbd8Secondary(self, feedback_fn):
11523     """Replace the secondary node for DRBD 8.
11524
11525     The algorithm for replace is quite complicated:
11526       - for all disks of the instance:
11527         - create new LVs on the new node with same names
11528         - shutdown the drbd device on the old secondary
11529         - disconnect the drbd network on the primary
11530         - create the drbd device on the new secondary
11531         - network attach the drbd on the primary, using an artifice:
11532           the drbd code for Attach() will connect to the network if it
11533           finds a device which is connected to the good local disks but
11534           not network enabled
11535       - wait for sync across all devices
11536       - remove all disks from the old secondary
11537
11538     Failures are not very well handled.
11539
11540     """
11541     steps_total = 6
11542
11543     pnode = self.instance.primary_node
11544
11545     # Step: check device activation
11546     self.lu.LogStep(1, steps_total, "Check device existence")
11547     self._CheckDisksExistence([self.instance.primary_node])
11548     self._CheckVolumeGroup([self.instance.primary_node])
11549
11550     # Step: check other node consistency
11551     self.lu.LogStep(2, steps_total, "Check peer consistency")
11552     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11553
11554     # Step: create new storage
11555     self.lu.LogStep(3, steps_total, "Allocate new storage")
11556     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11557     for idx, dev in enumerate(disks):
11558       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11559                       (self.new_node, idx))
11560       # we pass force_create=True to force LVM creation
11561       for new_lv in dev.children:
11562         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11563                              True, _GetInstanceInfoText(self.instance), False)
11564
11565     # Step 4: dbrd minors and drbd setups changes
11566     # after this, we must manually remove the drbd minors on both the
11567     # error and the success paths
11568     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11569     minors = self.cfg.AllocateDRBDMinor([self.new_node
11570                                          for dev in self.instance.disks],
11571                                         self.instance.name)
11572     logging.debug("Allocated minors %r", minors)
11573
11574     iv_names = {}
11575     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11576       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11577                       (self.new_node, idx))
11578       # create new devices on new_node; note that we create two IDs:
11579       # one without port, so the drbd will be activated without
11580       # networking information on the new node at this stage, and one
11581       # with network, for the latter activation in step 4
11582       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11583       if self.instance.primary_node == o_node1:
11584         p_minor = o_minor1
11585       else:
11586         assert self.instance.primary_node == o_node2, "Three-node instance?"
11587         p_minor = o_minor2
11588
11589       new_alone_id = (self.instance.primary_node, self.new_node, None,
11590                       p_minor, new_minor, o_secret)
11591       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11592                     p_minor, new_minor, o_secret)
11593
11594       iv_names[idx] = (dev, dev.children, new_net_id)
11595       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11596                     new_net_id)
11597       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11598                               logical_id=new_alone_id,
11599                               children=dev.children,
11600                               size=dev.size,
11601                               params={})
11602       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11603                                              self.cfg)
11604       try:
11605         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11606                               anno_new_drbd,
11607                               _GetInstanceInfoText(self.instance), False)
11608       except errors.GenericError:
11609         self.cfg.ReleaseDRBDMinors(self.instance.name)
11610         raise
11611
11612     # We have new devices, shutdown the drbd on the old secondary
11613     for idx, dev in enumerate(self.instance.disks):
11614       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11615       self.cfg.SetDiskID(dev, self.target_node)
11616       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11617                                             (dev, self.instance)).fail_msg
11618       if msg:
11619         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11620                            "node: %s" % (idx, msg),
11621                            hint=("Please cleanup this device manually as"
11622                                  " soon as possible"))
11623
11624     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11625     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11626                                                self.instance.disks)[pnode]
11627
11628     msg = result.fail_msg
11629     if msg:
11630       # detaches didn't succeed (unlikely)
11631       self.cfg.ReleaseDRBDMinors(self.instance.name)
11632       raise errors.OpExecError("Can't detach the disks from the network on"
11633                                " old node: %s" % (msg,))
11634
11635     # if we managed to detach at least one, we update all the disks of
11636     # the instance to point to the new secondary
11637     self.lu.LogInfo("Updating instance configuration")
11638     for dev, _, new_logical_id in iv_names.itervalues():
11639       dev.logical_id = new_logical_id
11640       self.cfg.SetDiskID(dev, self.instance.primary_node)
11641
11642     self.cfg.Update(self.instance, feedback_fn)
11643
11644     # Release all node locks (the configuration has been updated)
11645     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11646
11647     # and now perform the drbd attach
11648     self.lu.LogInfo("Attaching primary drbds to new secondary"
11649                     " (standalone => connected)")
11650     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11651                                             self.new_node],
11652                                            self.node_secondary_ip,
11653                                            (self.instance.disks, self.instance),
11654                                            self.instance.name,
11655                                            False)
11656     for to_node, to_result in result.items():
11657       msg = to_result.fail_msg
11658       if msg:
11659         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11660                            to_node, msg,
11661                            hint=("please do a gnt-instance info to see the"
11662                                  " status of disks"))
11663
11664     cstep = itertools.count(5)
11665
11666     if self.early_release:
11667       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11668       self._RemoveOldStorage(self.target_node, iv_names)
11669       # TODO: Check if releasing locks early still makes sense
11670       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11671     else:
11672       # Release all resource locks except those used by the instance
11673       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11674                     keep=self.node_secondary_ip.keys())
11675
11676     # TODO: Can the instance lock be downgraded here? Take the optional disk
11677     # shutdown in the caller into consideration.
11678
11679     # Wait for sync
11680     # This can fail as the old devices are degraded and _WaitForSync
11681     # does a combined result over all disks, so we don't check its return value
11682     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11683     _WaitForSync(self.lu, self.instance)
11684
11685     # Check all devices manually
11686     self._CheckDevices(self.instance.primary_node, iv_names)
11687
11688     # Step: remove old storage
11689     if not self.early_release:
11690       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11691       self._RemoveOldStorage(self.target_node, iv_names)
11692
11693
11694 class LURepairNodeStorage(NoHooksLU):
11695   """Repairs the volume group on a node.
11696
11697   """
11698   REQ_BGL = False
11699
11700   def CheckArguments(self):
11701     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11702
11703     storage_type = self.op.storage_type
11704
11705     if (constants.SO_FIX_CONSISTENCY not in
11706         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11707       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11708                                  " repaired" % storage_type,
11709                                  errors.ECODE_INVAL)
11710
11711   def ExpandNames(self):
11712     self.needed_locks = {
11713       locking.LEVEL_NODE: [self.op.node_name],
11714       }
11715
11716   def _CheckFaultyDisks(self, instance, node_name):
11717     """Ensure faulty disks abort the opcode or at least warn."""
11718     try:
11719       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11720                                   node_name, True):
11721         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11722                                    " node '%s'" % (instance.name, node_name),
11723                                    errors.ECODE_STATE)
11724     except errors.OpPrereqError, err:
11725       if self.op.ignore_consistency:
11726         self.proc.LogWarning(str(err.args[0]))
11727       else:
11728         raise
11729
11730   def CheckPrereq(self):
11731     """Check prerequisites.
11732
11733     """
11734     # Check whether any instance on this node has faulty disks
11735     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11736       if inst.admin_state != constants.ADMINST_UP:
11737         continue
11738       check_nodes = set(inst.all_nodes)
11739       check_nodes.discard(self.op.node_name)
11740       for inst_node_name in check_nodes:
11741         self._CheckFaultyDisks(inst, inst_node_name)
11742
11743   def Exec(self, feedback_fn):
11744     feedback_fn("Repairing storage unit '%s' on %s ..." %
11745                 (self.op.name, self.op.node_name))
11746
11747     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11748     result = self.rpc.call_storage_execute(self.op.node_name,
11749                                            self.op.storage_type, st_args,
11750                                            self.op.name,
11751                                            constants.SO_FIX_CONSISTENCY)
11752     result.Raise("Failed to repair storage unit '%s' on %s" %
11753                  (self.op.name, self.op.node_name))
11754
11755
11756 class LUNodeEvacuate(NoHooksLU):
11757   """Evacuates instances off a list of nodes.
11758
11759   """
11760   REQ_BGL = False
11761
11762   _MODE2IALLOCATOR = {
11763     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11764     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11765     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11766     }
11767   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11768   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11769           constants.IALLOCATOR_NEVAC_MODES)
11770
11771   def CheckArguments(self):
11772     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11773
11774   def ExpandNames(self):
11775     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11776
11777     if self.op.remote_node is not None:
11778       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11779       assert self.op.remote_node
11780
11781       if self.op.remote_node == self.op.node_name:
11782         raise errors.OpPrereqError("Can not use evacuated node as a new"
11783                                    " secondary node", errors.ECODE_INVAL)
11784
11785       if self.op.mode != constants.NODE_EVAC_SEC:
11786         raise errors.OpPrereqError("Without the use of an iallocator only"
11787                                    " secondary instances can be evacuated",
11788                                    errors.ECODE_INVAL)
11789
11790     # Declare locks
11791     self.share_locks = _ShareAll()
11792     self.needed_locks = {
11793       locking.LEVEL_INSTANCE: [],
11794       locking.LEVEL_NODEGROUP: [],
11795       locking.LEVEL_NODE: [],
11796       }
11797
11798     # Determine nodes (via group) optimistically, needs verification once locks
11799     # have been acquired
11800     self.lock_nodes = self._DetermineNodes()
11801
11802   def _DetermineNodes(self):
11803     """Gets the list of nodes to operate on.
11804
11805     """
11806     if self.op.remote_node is None:
11807       # Iallocator will choose any node(s) in the same group
11808       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11809     else:
11810       group_nodes = frozenset([self.op.remote_node])
11811
11812     # Determine nodes to be locked
11813     return set([self.op.node_name]) | group_nodes
11814
11815   def _DetermineInstances(self):
11816     """Builds list of instances to operate on.
11817
11818     """
11819     assert self.op.mode in constants.NODE_EVAC_MODES
11820
11821     if self.op.mode == constants.NODE_EVAC_PRI:
11822       # Primary instances only
11823       inst_fn = _GetNodePrimaryInstances
11824       assert self.op.remote_node is None, \
11825         "Evacuating primary instances requires iallocator"
11826     elif self.op.mode == constants.NODE_EVAC_SEC:
11827       # Secondary instances only
11828       inst_fn = _GetNodeSecondaryInstances
11829     else:
11830       # All instances
11831       assert self.op.mode == constants.NODE_EVAC_ALL
11832       inst_fn = _GetNodeInstances
11833       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11834       # per instance
11835       raise errors.OpPrereqError("Due to an issue with the iallocator"
11836                                  " interface it is not possible to evacuate"
11837                                  " all instances at once; specify explicitly"
11838                                  " whether to evacuate primary or secondary"
11839                                  " instances",
11840                                  errors.ECODE_INVAL)
11841
11842     return inst_fn(self.cfg, self.op.node_name)
11843
11844   def DeclareLocks(self, level):
11845     if level == locking.LEVEL_INSTANCE:
11846       # Lock instances optimistically, needs verification once node and group
11847       # locks have been acquired
11848       self.needed_locks[locking.LEVEL_INSTANCE] = \
11849         set(i.name for i in self._DetermineInstances())
11850
11851     elif level == locking.LEVEL_NODEGROUP:
11852       # Lock node groups for all potential target nodes optimistically, needs
11853       # verification once nodes have been acquired
11854       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11855         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11856
11857     elif level == locking.LEVEL_NODE:
11858       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11859
11860   def CheckPrereq(self):
11861     # Verify locks
11862     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11863     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11864     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11865
11866     need_nodes = self._DetermineNodes()
11867
11868     if not owned_nodes.issuperset(need_nodes):
11869       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11870                                  " locks were acquired, current nodes are"
11871                                  " are '%s', used to be '%s'; retry the"
11872                                  " operation" %
11873                                  (self.op.node_name,
11874                                   utils.CommaJoin(need_nodes),
11875                                   utils.CommaJoin(owned_nodes)),
11876                                  errors.ECODE_STATE)
11877
11878     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11879     if owned_groups != wanted_groups:
11880       raise errors.OpExecError("Node groups changed since locks were acquired,"
11881                                " current groups are '%s', used to be '%s';"
11882                                " retry the operation" %
11883                                (utils.CommaJoin(wanted_groups),
11884                                 utils.CommaJoin(owned_groups)))
11885
11886     # Determine affected instances
11887     self.instances = self._DetermineInstances()
11888     self.instance_names = [i.name for i in self.instances]
11889
11890     if set(self.instance_names) != owned_instances:
11891       raise errors.OpExecError("Instances on node '%s' changed since locks"
11892                                " were acquired, current instances are '%s',"
11893                                " used to be '%s'; retry the operation" %
11894                                (self.op.node_name,
11895                                 utils.CommaJoin(self.instance_names),
11896                                 utils.CommaJoin(owned_instances)))
11897
11898     if self.instance_names:
11899       self.LogInfo("Evacuating instances from node '%s': %s",
11900                    self.op.node_name,
11901                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11902     else:
11903       self.LogInfo("No instances to evacuate from node '%s'",
11904                    self.op.node_name)
11905
11906     if self.op.remote_node is not None:
11907       for i in self.instances:
11908         if i.primary_node == self.op.remote_node:
11909           raise errors.OpPrereqError("Node %s is the primary node of"
11910                                      " instance %s, cannot use it as"
11911                                      " secondary" %
11912                                      (self.op.remote_node, i.name),
11913                                      errors.ECODE_INVAL)
11914
11915   def Exec(self, feedback_fn):
11916     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11917
11918     if not self.instance_names:
11919       # No instances to evacuate
11920       jobs = []
11921
11922     elif self.op.iallocator is not None:
11923       # TODO: Implement relocation to other group
11924       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11925       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11926                                      instances=list(self.instance_names))
11927       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11928
11929       ial.Run(self.op.iallocator)
11930
11931       if not ial.success:
11932         raise errors.OpPrereqError("Can't compute node evacuation using"
11933                                    " iallocator '%s': %s" %
11934                                    (self.op.iallocator, ial.info),
11935                                    errors.ECODE_NORES)
11936
11937       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11938
11939     elif self.op.remote_node is not None:
11940       assert self.op.mode == constants.NODE_EVAC_SEC
11941       jobs = [
11942         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11943                                         remote_node=self.op.remote_node,
11944                                         disks=[],
11945                                         mode=constants.REPLACE_DISK_CHG,
11946                                         early_release=self.op.early_release)]
11947         for instance_name in self.instance_names
11948         ]
11949
11950     else:
11951       raise errors.ProgrammerError("No iallocator or remote node")
11952
11953     return ResultWithJobs(jobs)
11954
11955
11956 def _SetOpEarlyRelease(early_release, op):
11957   """Sets C{early_release} flag on opcodes if available.
11958
11959   """
11960   try:
11961     op.early_release = early_release
11962   except AttributeError:
11963     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11964
11965   return op
11966
11967
11968 def _NodeEvacDest(use_nodes, group, nodes):
11969   """Returns group or nodes depending on caller's choice.
11970
11971   """
11972   if use_nodes:
11973     return utils.CommaJoin(nodes)
11974   else:
11975     return group
11976
11977
11978 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11979   """Unpacks the result of change-group and node-evacuate iallocator requests.
11980
11981   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11982   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11983
11984   @type lu: L{LogicalUnit}
11985   @param lu: Logical unit instance
11986   @type alloc_result: tuple/list
11987   @param alloc_result: Result from iallocator
11988   @type early_release: bool
11989   @param early_release: Whether to release locks early if possible
11990   @type use_nodes: bool
11991   @param use_nodes: Whether to display node names instead of groups
11992
11993   """
11994   (moved, failed, jobs) = alloc_result
11995
11996   if failed:
11997     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11998                                  for (name, reason) in failed)
11999     lu.LogWarning("Unable to evacuate instances %s", failreason)
12000     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12001
12002   if moved:
12003     lu.LogInfo("Instances to be moved: %s",
12004                utils.CommaJoin("%s (to %s)" %
12005                                (name, _NodeEvacDest(use_nodes, group, nodes))
12006                                for (name, group, nodes) in moved))
12007
12008   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12009               map(opcodes.OpCode.LoadOpCode, ops))
12010           for ops in jobs]
12011
12012
12013 def _DiskSizeInBytesToMebibytes(lu, size):
12014   """Converts a disk size in bytes to mebibytes.
12015
12016   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12017
12018   """
12019   (mib, remainder) = divmod(size, 1024 * 1024)
12020
12021   if remainder != 0:
12022     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12023                   " to not overwrite existing data (%s bytes will not be"
12024                   " wiped)", (1024 * 1024) - remainder)
12025     mib += 1
12026
12027   return mib
12028
12029
12030 class LUInstanceGrowDisk(LogicalUnit):
12031   """Grow a disk of an instance.
12032
12033   """
12034   HPATH = "disk-grow"
12035   HTYPE = constants.HTYPE_INSTANCE
12036   REQ_BGL = False
12037
12038   def ExpandNames(self):
12039     self._ExpandAndLockInstance()
12040     self.needed_locks[locking.LEVEL_NODE] = []
12041     self.needed_locks[locking.LEVEL_NODE_RES] = []
12042     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12043     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12044
12045   def DeclareLocks(self, level):
12046     if level == locking.LEVEL_NODE:
12047       self._LockInstancesNodes()
12048     elif level == locking.LEVEL_NODE_RES:
12049       # Copy node locks
12050       self.needed_locks[locking.LEVEL_NODE_RES] = \
12051         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12052
12053   def BuildHooksEnv(self):
12054     """Build hooks env.
12055
12056     This runs on the master, the primary and all the secondaries.
12057
12058     """
12059     env = {
12060       "DISK": self.op.disk,
12061       "AMOUNT": self.op.amount,
12062       "ABSOLUTE": self.op.absolute,
12063       }
12064     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12065     return env
12066
12067   def BuildHooksNodes(self):
12068     """Build hooks nodes.
12069
12070     """
12071     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12072     return (nl, nl)
12073
12074   def CheckPrereq(self):
12075     """Check prerequisites.
12076
12077     This checks that the instance is in the cluster.
12078
12079     """
12080     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12081     assert instance is not None, \
12082       "Cannot retrieve locked instance %s" % self.op.instance_name
12083     nodenames = list(instance.all_nodes)
12084     for node in nodenames:
12085       _CheckNodeOnline(self, node)
12086
12087     self.instance = instance
12088
12089     if instance.disk_template not in constants.DTS_GROWABLE:
12090       raise errors.OpPrereqError("Instance's disk layout does not support"
12091                                  " growing", errors.ECODE_INVAL)
12092
12093     self.disk = instance.FindDisk(self.op.disk)
12094
12095     if self.op.absolute:
12096       self.target = self.op.amount
12097       self.delta = self.target - self.disk.size
12098       if self.delta < 0:
12099         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12100                                    "current disk size (%s)" %
12101                                    (utils.FormatUnit(self.target, "h"),
12102                                     utils.FormatUnit(self.disk.size, "h")),
12103                                    errors.ECODE_STATE)
12104     else:
12105       self.delta = self.op.amount
12106       self.target = self.disk.size + self.delta
12107       if self.delta < 0:
12108         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12109                                    utils.FormatUnit(self.delta, "h"),
12110                                    errors.ECODE_INVAL)
12111
12112     if instance.disk_template not in (constants.DT_FILE,
12113                                       constants.DT_SHARED_FILE,
12114                                       constants.DT_RBD):
12115       # TODO: check the free disk space for file, when that feature will be
12116       # supported
12117       _CheckNodesFreeDiskPerVG(self, nodenames,
12118                                self.disk.ComputeGrowth(self.delta))
12119
12120   def Exec(self, feedback_fn):
12121     """Execute disk grow.
12122
12123     """
12124     instance = self.instance
12125     disk = self.disk
12126
12127     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12128     assert (self.owned_locks(locking.LEVEL_NODE) ==
12129             self.owned_locks(locking.LEVEL_NODE_RES))
12130
12131     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12132
12133     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12134     if not disks_ok:
12135       raise errors.OpExecError("Cannot activate block device to grow")
12136
12137     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12138                 (self.op.disk, instance.name,
12139                  utils.FormatUnit(self.delta, "h"),
12140                  utils.FormatUnit(self.target, "h")))
12141
12142     # First run all grow ops in dry-run mode
12143     for node in instance.all_nodes:
12144       self.cfg.SetDiskID(disk, node)
12145       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12146                                            True, True)
12147       result.Raise("Dry-run grow request failed to node %s" % node)
12148
12149     if wipe_disks:
12150       # Get disk size from primary node for wiping
12151       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12152       result.Raise("Failed to retrieve disk size from node '%s'" %
12153                    instance.primary_node)
12154
12155       (disk_size_in_bytes, ) = result.payload
12156
12157       if disk_size_in_bytes is None:
12158         raise errors.OpExecError("Failed to retrieve disk size from primary"
12159                                  " node '%s'" % instance.primary_node)
12160
12161       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12162
12163       assert old_disk_size >= disk.size, \
12164         ("Retrieved disk size too small (got %s, should be at least %s)" %
12165          (old_disk_size, disk.size))
12166     else:
12167       old_disk_size = None
12168
12169     # We know that (as far as we can test) operations across different
12170     # nodes will succeed, time to run it for real on the backing storage
12171     for node in instance.all_nodes:
12172       self.cfg.SetDiskID(disk, node)
12173       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12174                                            False, True)
12175       result.Raise("Grow request failed to node %s" % node)
12176
12177     # And now execute it for logical storage, on the primary node
12178     node = instance.primary_node
12179     self.cfg.SetDiskID(disk, node)
12180     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12181                                          False, False)
12182     result.Raise("Grow request failed to node %s" % node)
12183
12184     disk.RecordGrow(self.delta)
12185     self.cfg.Update(instance, feedback_fn)
12186
12187     # Changes have been recorded, release node lock
12188     _ReleaseLocks(self, locking.LEVEL_NODE)
12189
12190     # Downgrade lock while waiting for sync
12191     self.glm.downgrade(locking.LEVEL_INSTANCE)
12192
12193     assert wipe_disks ^ (old_disk_size is None)
12194
12195     if wipe_disks:
12196       assert instance.disks[self.op.disk] == disk
12197
12198       # Wipe newly added disk space
12199       _WipeDisks(self, instance,
12200                  disks=[(self.op.disk, disk, old_disk_size)])
12201
12202     if self.op.wait_for_sync:
12203       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12204       if disk_abort:
12205         self.proc.LogWarning("Disk sync-ing has not returned a good"
12206                              " status; please check the instance")
12207       if instance.admin_state != constants.ADMINST_UP:
12208         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12209     elif instance.admin_state != constants.ADMINST_UP:
12210       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12211                            " not supposed to be running because no wait for"
12212                            " sync mode was requested")
12213
12214     assert self.owned_locks(locking.LEVEL_NODE_RES)
12215     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12216
12217
12218 class LUInstanceQueryData(NoHooksLU):
12219   """Query runtime instance data.
12220
12221   """
12222   REQ_BGL = False
12223
12224   def ExpandNames(self):
12225     self.needed_locks = {}
12226
12227     # Use locking if requested or when non-static information is wanted
12228     if not (self.op.static or self.op.use_locking):
12229       self.LogWarning("Non-static data requested, locks need to be acquired")
12230       self.op.use_locking = True
12231
12232     if self.op.instances or not self.op.use_locking:
12233       # Expand instance names right here
12234       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12235     else:
12236       # Will use acquired locks
12237       self.wanted_names = None
12238
12239     if self.op.use_locking:
12240       self.share_locks = _ShareAll()
12241
12242       if self.wanted_names is None:
12243         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12244       else:
12245         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12246
12247       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12248       self.needed_locks[locking.LEVEL_NODE] = []
12249       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12250
12251   def DeclareLocks(self, level):
12252     if self.op.use_locking:
12253       if level == locking.LEVEL_NODEGROUP:
12254         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12255
12256         # Lock all groups used by instances optimistically; this requires going
12257         # via the node before it's locked, requiring verification later on
12258         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12259           frozenset(group_uuid
12260                     for instance_name in owned_instances
12261                     for group_uuid in
12262                       self.cfg.GetInstanceNodeGroups(instance_name))
12263
12264       elif level == locking.LEVEL_NODE:
12265         self._LockInstancesNodes()
12266
12267   def CheckPrereq(self):
12268     """Check prerequisites.
12269
12270     This only checks the optional instance list against the existing names.
12271
12272     """
12273     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12274     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12275     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12276
12277     if self.wanted_names is None:
12278       assert self.op.use_locking, "Locking was not used"
12279       self.wanted_names = owned_instances
12280
12281     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12282
12283     if self.op.use_locking:
12284       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12285                                 None)
12286     else:
12287       assert not (owned_instances or owned_groups or owned_nodes)
12288
12289     self.wanted_instances = instances.values()
12290
12291   def _ComputeBlockdevStatus(self, node, instance, dev):
12292     """Returns the status of a block device
12293
12294     """
12295     if self.op.static or not node:
12296       return None
12297
12298     self.cfg.SetDiskID(dev, node)
12299
12300     result = self.rpc.call_blockdev_find(node, dev)
12301     if result.offline:
12302       return None
12303
12304     result.Raise("Can't compute disk status for %s" % instance.name)
12305
12306     status = result.payload
12307     if status is None:
12308       return None
12309
12310     return (status.dev_path, status.major, status.minor,
12311             status.sync_percent, status.estimated_time,
12312             status.is_degraded, status.ldisk_status)
12313
12314   def _ComputeDiskStatus(self, instance, snode, dev):
12315     """Compute block device status.
12316
12317     """
12318     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12319
12320     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12321
12322   def _ComputeDiskStatusInner(self, instance, snode, dev):
12323     """Compute block device status.
12324
12325     @attention: The device has to be annotated already.
12326
12327     """
12328     if dev.dev_type in constants.LDS_DRBD:
12329       # we change the snode then (otherwise we use the one passed in)
12330       if dev.logical_id[0] == instance.primary_node:
12331         snode = dev.logical_id[1]
12332       else:
12333         snode = dev.logical_id[0]
12334
12335     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12336                                               instance, dev)
12337     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12338
12339     if dev.children:
12340       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12341                                         instance, snode),
12342                          dev.children)
12343     else:
12344       dev_children = []
12345
12346     return {
12347       "iv_name": dev.iv_name,
12348       "dev_type": dev.dev_type,
12349       "logical_id": dev.logical_id,
12350       "physical_id": dev.physical_id,
12351       "pstatus": dev_pstatus,
12352       "sstatus": dev_sstatus,
12353       "children": dev_children,
12354       "mode": dev.mode,
12355       "size": dev.size,
12356       }
12357
12358   def Exec(self, feedback_fn):
12359     """Gather and return data"""
12360     result = {}
12361
12362     cluster = self.cfg.GetClusterInfo()
12363
12364     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12365     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12366
12367     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12368                                                  for node in nodes.values()))
12369
12370     group2name_fn = lambda uuid: groups[uuid].name
12371
12372     for instance in self.wanted_instances:
12373       pnode = nodes[instance.primary_node]
12374
12375       if self.op.static or pnode.offline:
12376         remote_state = None
12377         if pnode.offline:
12378           self.LogWarning("Primary node %s is marked offline, returning static"
12379                           " information only for instance %s" %
12380                           (pnode.name, instance.name))
12381       else:
12382         remote_info = self.rpc.call_instance_info(instance.primary_node,
12383                                                   instance.name,
12384                                                   instance.hypervisor)
12385         remote_info.Raise("Error checking node %s" % instance.primary_node)
12386         remote_info = remote_info.payload
12387         if remote_info and "state" in remote_info:
12388           remote_state = "up"
12389         else:
12390           if instance.admin_state == constants.ADMINST_UP:
12391             remote_state = "down"
12392           else:
12393             remote_state = instance.admin_state
12394
12395       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12396                   instance.disks)
12397
12398       snodes_group_uuids = [nodes[snode_name].group
12399                             for snode_name in instance.secondary_nodes]
12400
12401       result[instance.name] = {
12402         "name": instance.name,
12403         "config_state": instance.admin_state,
12404         "run_state": remote_state,
12405         "pnode": instance.primary_node,
12406         "pnode_group_uuid": pnode.group,
12407         "pnode_group_name": group2name_fn(pnode.group),
12408         "snodes": instance.secondary_nodes,
12409         "snodes_group_uuids": snodes_group_uuids,
12410         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12411         "os": instance.os,
12412         # this happens to be the same format used for hooks
12413         "nics": _NICListToTuple(self, instance.nics),
12414         "disk_template": instance.disk_template,
12415         "disks": disks,
12416         "hypervisor": instance.hypervisor,
12417         "network_port": instance.network_port,
12418         "hv_instance": instance.hvparams,
12419         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12420         "be_instance": instance.beparams,
12421         "be_actual": cluster.FillBE(instance),
12422         "os_instance": instance.osparams,
12423         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12424         "serial_no": instance.serial_no,
12425         "mtime": instance.mtime,
12426         "ctime": instance.ctime,
12427         "uuid": instance.uuid,
12428         }
12429
12430     return result
12431
12432
12433 def PrepareContainerMods(mods, private_fn):
12434   """Prepares a list of container modifications by adding a private data field.
12435
12436   @type mods: list of tuples; (operation, index, parameters)
12437   @param mods: List of modifications
12438   @type private_fn: callable or None
12439   @param private_fn: Callable for constructing a private data field for a
12440     modification
12441   @rtype: list
12442
12443   """
12444   if private_fn is None:
12445     fn = lambda: None
12446   else:
12447     fn = private_fn
12448
12449   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12450
12451
12452 #: Type description for changes as returned by L{ApplyContainerMods}'s
12453 #: callbacks
12454 _TApplyContModsCbChanges = \
12455   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12456     ht.TNonEmptyString,
12457     ht.TAny,
12458     ])))
12459
12460
12461 def ApplyContainerMods(kind, container, chgdesc, mods,
12462                        create_fn, modify_fn, remove_fn):
12463   """Applies descriptions in C{mods} to C{container}.
12464
12465   @type kind: string
12466   @param kind: One-word item description
12467   @type container: list
12468   @param container: Container to modify
12469   @type chgdesc: None or list
12470   @param chgdesc: List of applied changes
12471   @type mods: list
12472   @param mods: Modifications as returned by L{PrepareContainerMods}
12473   @type create_fn: callable
12474   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12475     receives absolute item index, parameters and private data object as added
12476     by L{PrepareContainerMods}, returns tuple containing new item and changes
12477     as list
12478   @type modify_fn: callable
12479   @param modify_fn: Callback for modifying an existing item
12480     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12481     and private data object as added by L{PrepareContainerMods}, returns
12482     changes as list
12483   @type remove_fn: callable
12484   @param remove_fn: Callback on removing item; receives absolute item index,
12485     item and private data object as added by L{PrepareContainerMods}
12486
12487   """
12488   for (op, idx, params, private) in mods:
12489     if idx == -1:
12490       # Append
12491       absidx = len(container) - 1
12492     elif idx < 0:
12493       raise IndexError("Not accepting negative indices other than -1")
12494     elif idx > len(container):
12495       raise IndexError("Got %s index %s, but there are only %s" %
12496                        (kind, idx, len(container)))
12497     else:
12498       absidx = idx
12499
12500     changes = None
12501
12502     if op == constants.DDM_ADD:
12503       # Calculate where item will be added
12504       if idx == -1:
12505         addidx = len(container)
12506       else:
12507         addidx = idx
12508
12509       if create_fn is None:
12510         item = params
12511       else:
12512         (item, changes) = create_fn(addidx, params, private)
12513
12514       if idx == -1:
12515         container.append(item)
12516       else:
12517         assert idx >= 0
12518         assert idx <= len(container)
12519         # list.insert does so before the specified index
12520         container.insert(idx, item)
12521     else:
12522       # Retrieve existing item
12523       try:
12524         item = container[absidx]
12525       except IndexError:
12526         raise IndexError("Invalid %s index %s" % (kind, idx))
12527
12528       if op == constants.DDM_REMOVE:
12529         assert not params
12530
12531         if remove_fn is not None:
12532           remove_fn(absidx, item, private)
12533
12534         changes = [("%s/%s" % (kind, absidx), "remove")]
12535
12536         assert container[absidx] == item
12537         del container[absidx]
12538       elif op == constants.DDM_MODIFY:
12539         if modify_fn is not None:
12540           changes = modify_fn(absidx, item, params, private)
12541       else:
12542         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12543
12544     assert _TApplyContModsCbChanges(changes)
12545
12546     if not (chgdesc is None or changes is None):
12547       chgdesc.extend(changes)
12548
12549
12550 def _UpdateIvNames(base_index, disks):
12551   """Updates the C{iv_name} attribute of disks.
12552
12553   @type disks: list of L{objects.Disk}
12554
12555   """
12556   for (idx, disk) in enumerate(disks):
12557     disk.iv_name = "disk/%s" % (base_index + idx, )
12558
12559
12560 class _InstNicModPrivate:
12561   """Data structure for network interface modifications.
12562
12563   Used by L{LUInstanceSetParams}.
12564
12565   """
12566   def __init__(self):
12567     self.params = None
12568     self.filled = None
12569
12570
12571 class LUInstanceSetParams(LogicalUnit):
12572   """Modifies an instances's parameters.
12573
12574   """
12575   HPATH = "instance-modify"
12576   HTYPE = constants.HTYPE_INSTANCE
12577   REQ_BGL = False
12578
12579   @staticmethod
12580   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12581     assert ht.TList(mods)
12582     assert not mods or len(mods[0]) in (2, 3)
12583
12584     if mods and len(mods[0]) == 2:
12585       result = []
12586
12587       addremove = 0
12588       for op, params in mods:
12589         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12590           result.append((op, -1, params))
12591           addremove += 1
12592
12593           if addremove > 1:
12594             raise errors.OpPrereqError("Only one %s add or remove operation is"
12595                                        " supported at a time" % kind,
12596                                        errors.ECODE_INVAL)
12597         else:
12598           result.append((constants.DDM_MODIFY, op, params))
12599
12600       assert verify_fn(result)
12601     else:
12602       result = mods
12603
12604     return result
12605
12606   @staticmethod
12607   def _CheckMods(kind, mods, key_types, item_fn):
12608     """Ensures requested disk/NIC modifications are valid.
12609
12610     """
12611     for (op, _, params) in mods:
12612       assert ht.TDict(params)
12613
12614       utils.ForceDictType(params, key_types)
12615
12616       if op == constants.DDM_REMOVE:
12617         if params:
12618           raise errors.OpPrereqError("No settings should be passed when"
12619                                      " removing a %s" % kind,
12620                                      errors.ECODE_INVAL)
12621       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12622         item_fn(op, params)
12623       else:
12624         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12625
12626   @staticmethod
12627   def _VerifyDiskModification(op, params):
12628     """Verifies a disk modification.
12629
12630     """
12631     if op == constants.DDM_ADD:
12632       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12633       if mode not in constants.DISK_ACCESS_SET:
12634         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12635                                    errors.ECODE_INVAL)
12636
12637       size = params.get(constants.IDISK_SIZE, None)
12638       if size is None:
12639         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12640                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12641
12642       try:
12643         size = int(size)
12644       except (TypeError, ValueError), err:
12645         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12646                                    errors.ECODE_INVAL)
12647
12648       params[constants.IDISK_SIZE] = size
12649
12650     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12651       raise errors.OpPrereqError("Disk size change not possible, use"
12652                                  " grow-disk", errors.ECODE_INVAL)
12653
12654   @staticmethod
12655   def _VerifyNicModification(op, params):
12656     """Verifies a network interface modification.
12657
12658     """
12659     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12660       ip = params.get(constants.INIC_IP, None)
12661       if ip is None:
12662         pass
12663       elif ip.lower() == constants.VALUE_NONE:
12664         params[constants.INIC_IP] = None
12665       elif not netutils.IPAddress.IsValid(ip):
12666         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12667                                    errors.ECODE_INVAL)
12668
12669       bridge = params.get("bridge", None)
12670       link = params.get(constants.INIC_LINK, None)
12671       if bridge and link:
12672         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12673                                    " at the same time", errors.ECODE_INVAL)
12674       elif bridge and bridge.lower() == constants.VALUE_NONE:
12675         params["bridge"] = None
12676       elif link and link.lower() == constants.VALUE_NONE:
12677         params[constants.INIC_LINK] = None
12678
12679       if op == constants.DDM_ADD:
12680         macaddr = params.get(constants.INIC_MAC, None)
12681         if macaddr is None:
12682           params[constants.INIC_MAC] = constants.VALUE_AUTO
12683
12684       if constants.INIC_MAC in params:
12685         macaddr = params[constants.INIC_MAC]
12686         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12687           macaddr = utils.NormalizeAndValidateMac(macaddr)
12688
12689         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12690           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12691                                      " modifying an existing NIC",
12692                                      errors.ECODE_INVAL)
12693
12694   def CheckArguments(self):
12695     if not (self.op.nics or self.op.disks or self.op.disk_template or
12696             self.op.hvparams or self.op.beparams or self.op.os_name or
12697             self.op.offline is not None or self.op.runtime_mem):
12698       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12699
12700     if self.op.hvparams:
12701       _CheckGlobalHvParams(self.op.hvparams)
12702
12703     self.op.disks = self._UpgradeDiskNicMods(
12704       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12705     self.op.nics = self._UpgradeDiskNicMods(
12706       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12707
12708     # Check disk modifications
12709     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12710                     self._VerifyDiskModification)
12711
12712     if self.op.disks and self.op.disk_template is not None:
12713       raise errors.OpPrereqError("Disk template conversion and other disk"
12714                                  " changes not supported at the same time",
12715                                  errors.ECODE_INVAL)
12716
12717     if (self.op.disk_template and
12718         self.op.disk_template in constants.DTS_INT_MIRROR and
12719         self.op.remote_node is None):
12720       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12721                                  " one requires specifying a secondary node",
12722                                  errors.ECODE_INVAL)
12723
12724     # Check NIC modifications
12725     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12726                     self._VerifyNicModification)
12727
12728   def ExpandNames(self):
12729     self._ExpandAndLockInstance()
12730     # Can't even acquire node locks in shared mode as upcoming changes in
12731     # Ganeti 2.6 will start to modify the node object on disk conversion
12732     self.needed_locks[locking.LEVEL_NODE] = []
12733     self.needed_locks[locking.LEVEL_NODE_RES] = []
12734     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12735
12736   def DeclareLocks(self, level):
12737     # TODO: Acquire group lock in shared mode (disk parameters)
12738     if level == locking.LEVEL_NODE:
12739       self._LockInstancesNodes()
12740       if self.op.disk_template and self.op.remote_node:
12741         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12742         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12743     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12744       # Copy node locks
12745       self.needed_locks[locking.LEVEL_NODE_RES] = \
12746         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12747
12748   def BuildHooksEnv(self):
12749     """Build hooks env.
12750
12751     This runs on the master, primary and secondaries.
12752
12753     """
12754     args = dict()
12755     if constants.BE_MINMEM in self.be_new:
12756       args["minmem"] = self.be_new[constants.BE_MINMEM]
12757     if constants.BE_MAXMEM in self.be_new:
12758       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12759     if constants.BE_VCPUS in self.be_new:
12760       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12761     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12762     # information at all.
12763
12764     if self._new_nics is not None:
12765       nics = []
12766
12767       for nic in self._new_nics:
12768         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12769         mode = nicparams[constants.NIC_MODE]
12770         link = nicparams[constants.NIC_LINK]
12771         nics.append((nic.ip, nic.mac, mode, link))
12772
12773       args["nics"] = nics
12774
12775     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12776     if self.op.disk_template:
12777       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12778     if self.op.runtime_mem:
12779       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12780
12781     return env
12782
12783   def BuildHooksNodes(self):
12784     """Build hooks nodes.
12785
12786     """
12787     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12788     return (nl, nl)
12789
12790   def _PrepareNicModification(self, params, private, old_ip, old_params,
12791                               cluster, pnode):
12792     update_params_dict = dict([(key, params[key])
12793                                for key in constants.NICS_PARAMETERS
12794                                if key in params])
12795
12796     if "bridge" in params:
12797       update_params_dict[constants.NIC_LINK] = params["bridge"]
12798
12799     new_params = _GetUpdatedParams(old_params, update_params_dict)
12800     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12801
12802     new_filled_params = cluster.SimpleFillNIC(new_params)
12803     objects.NIC.CheckParameterSyntax(new_filled_params)
12804
12805     new_mode = new_filled_params[constants.NIC_MODE]
12806     if new_mode == constants.NIC_MODE_BRIDGED:
12807       bridge = new_filled_params[constants.NIC_LINK]
12808       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12809       if msg:
12810         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12811         if self.op.force:
12812           self.warn.append(msg)
12813         else:
12814           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12815
12816     elif new_mode == constants.NIC_MODE_ROUTED:
12817       ip = params.get(constants.INIC_IP, old_ip)
12818       if ip is None:
12819         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12820                                    " on a routed NIC", errors.ECODE_INVAL)
12821
12822     if constants.INIC_MAC in params:
12823       mac = params[constants.INIC_MAC]
12824       if mac is None:
12825         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12826                                    errors.ECODE_INVAL)
12827       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12828         # otherwise generate the MAC address
12829         params[constants.INIC_MAC] = \
12830           self.cfg.GenerateMAC(self.proc.GetECId())
12831       else:
12832         # or validate/reserve the current one
12833         try:
12834           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12835         except errors.ReservationError:
12836           raise errors.OpPrereqError("MAC address '%s' already in use"
12837                                      " in cluster" % mac,
12838                                      errors.ECODE_NOTUNIQUE)
12839
12840     private.params = new_params
12841     private.filled = new_filled_params
12842
12843   def CheckPrereq(self):
12844     """Check prerequisites.
12845
12846     This only checks the instance list against the existing names.
12847
12848     """
12849     # checking the new params on the primary/secondary nodes
12850
12851     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12852     cluster = self.cluster = self.cfg.GetClusterInfo()
12853     assert self.instance is not None, \
12854       "Cannot retrieve locked instance %s" % self.op.instance_name
12855     pnode = instance.primary_node
12856     nodelist = list(instance.all_nodes)
12857     pnode_info = self.cfg.GetNodeInfo(pnode)
12858     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12859
12860     # Prepare disk/NIC modifications
12861     self.diskmod = PrepareContainerMods(self.op.disks, None)
12862     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12863
12864     # OS change
12865     if self.op.os_name and not self.op.force:
12866       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12867                       self.op.force_variant)
12868       instance_os = self.op.os_name
12869     else:
12870       instance_os = instance.os
12871
12872     assert not (self.op.disk_template and self.op.disks), \
12873       "Can't modify disk template and apply disk changes at the same time"
12874
12875     if self.op.disk_template:
12876       if instance.disk_template == self.op.disk_template:
12877         raise errors.OpPrereqError("Instance already has disk template %s" %
12878                                    instance.disk_template, errors.ECODE_INVAL)
12879
12880       if (instance.disk_template,
12881           self.op.disk_template) not in self._DISK_CONVERSIONS:
12882         raise errors.OpPrereqError("Unsupported disk template conversion from"
12883                                    " %s to %s" % (instance.disk_template,
12884                                                   self.op.disk_template),
12885                                    errors.ECODE_INVAL)
12886       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12887                           msg="cannot change disk template")
12888       if self.op.disk_template in constants.DTS_INT_MIRROR:
12889         if self.op.remote_node == pnode:
12890           raise errors.OpPrereqError("Given new secondary node %s is the same"
12891                                      " as the primary node of the instance" %
12892                                      self.op.remote_node, errors.ECODE_STATE)
12893         _CheckNodeOnline(self, self.op.remote_node)
12894         _CheckNodeNotDrained(self, self.op.remote_node)
12895         # FIXME: here we assume that the old instance type is DT_PLAIN
12896         assert instance.disk_template == constants.DT_PLAIN
12897         disks = [{constants.IDISK_SIZE: d.size,
12898                   constants.IDISK_VG: d.logical_id[0]}
12899                  for d in instance.disks]
12900         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12901         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12902
12903         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12904         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12905         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12906                                                                 snode_group)
12907         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12908                                 ignore=self.op.ignore_ipolicy)
12909         if pnode_info.group != snode_info.group:
12910           self.LogWarning("The primary and secondary nodes are in two"
12911                           " different node groups; the disk parameters"
12912                           " from the first disk's node group will be"
12913                           " used")
12914
12915     # hvparams processing
12916     if self.op.hvparams:
12917       hv_type = instance.hypervisor
12918       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12919       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12920       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12921
12922       # local check
12923       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12924       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12925       self.hv_proposed = self.hv_new = hv_new # the new actual values
12926       self.hv_inst = i_hvdict # the new dict (without defaults)
12927     else:
12928       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12929                                               instance.hvparams)
12930       self.hv_new = self.hv_inst = {}
12931
12932     # beparams processing
12933     if self.op.beparams:
12934       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12935                                    use_none=True)
12936       objects.UpgradeBeParams(i_bedict)
12937       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12938       be_new = cluster.SimpleFillBE(i_bedict)
12939       self.be_proposed = self.be_new = be_new # the new actual values
12940       self.be_inst = i_bedict # the new dict (without defaults)
12941     else:
12942       self.be_new = self.be_inst = {}
12943       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12944     be_old = cluster.FillBE(instance)
12945
12946     # CPU param validation -- checking every time a parameter is
12947     # changed to cover all cases where either CPU mask or vcpus have
12948     # changed
12949     if (constants.BE_VCPUS in self.be_proposed and
12950         constants.HV_CPU_MASK in self.hv_proposed):
12951       cpu_list = \
12952         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12953       # Verify mask is consistent with number of vCPUs. Can skip this
12954       # test if only 1 entry in the CPU mask, which means same mask
12955       # is applied to all vCPUs.
12956       if (len(cpu_list) > 1 and
12957           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12958         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12959                                    " CPU mask [%s]" %
12960                                    (self.be_proposed[constants.BE_VCPUS],
12961                                     self.hv_proposed[constants.HV_CPU_MASK]),
12962                                    errors.ECODE_INVAL)
12963
12964       # Only perform this test if a new CPU mask is given
12965       if constants.HV_CPU_MASK in self.hv_new:
12966         # Calculate the largest CPU number requested
12967         max_requested_cpu = max(map(max, cpu_list))
12968         # Check that all of the instance's nodes have enough physical CPUs to
12969         # satisfy the requested CPU mask
12970         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12971                                 max_requested_cpu + 1, instance.hypervisor)
12972
12973     # osparams processing
12974     if self.op.osparams:
12975       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12976       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12977       self.os_inst = i_osdict # the new dict (without defaults)
12978     else:
12979       self.os_inst = {}
12980
12981     self.warn = []
12982
12983     #TODO(dynmem): do the appropriate check involving MINMEM
12984     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12985         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12986       mem_check_list = [pnode]
12987       if be_new[constants.BE_AUTO_BALANCE]:
12988         # either we changed auto_balance to yes or it was from before
12989         mem_check_list.extend(instance.secondary_nodes)
12990       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12991                                                   instance.hypervisor)
12992       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12993                                          [instance.hypervisor])
12994       pninfo = nodeinfo[pnode]
12995       msg = pninfo.fail_msg
12996       if msg:
12997         # Assume the primary node is unreachable and go ahead
12998         self.warn.append("Can't get info from primary node %s: %s" %
12999                          (pnode, msg))
13000       else:
13001         (_, _, (pnhvinfo, )) = pninfo.payload
13002         if not isinstance(pnhvinfo.get("memory_free", None), int):
13003           self.warn.append("Node data from primary node %s doesn't contain"
13004                            " free memory information" % pnode)
13005         elif instance_info.fail_msg:
13006           self.warn.append("Can't get instance runtime information: %s" %
13007                            instance_info.fail_msg)
13008         else:
13009           if instance_info.payload:
13010             current_mem = int(instance_info.payload["memory"])
13011           else:
13012             # Assume instance not running
13013             # (there is a slight race condition here, but it's not very
13014             # probable, and we have no other way to check)
13015             # TODO: Describe race condition
13016             current_mem = 0
13017           #TODO(dynmem): do the appropriate check involving MINMEM
13018           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13019                       pnhvinfo["memory_free"])
13020           if miss_mem > 0:
13021             raise errors.OpPrereqError("This change will prevent the instance"
13022                                        " from starting, due to %d MB of memory"
13023                                        " missing on its primary node" %
13024                                        miss_mem, errors.ECODE_NORES)
13025
13026       if be_new[constants.BE_AUTO_BALANCE]:
13027         for node, nres in nodeinfo.items():
13028           if node not in instance.secondary_nodes:
13029             continue
13030           nres.Raise("Can't get info from secondary node %s" % node,
13031                      prereq=True, ecode=errors.ECODE_STATE)
13032           (_, _, (nhvinfo, )) = nres.payload
13033           if not isinstance(nhvinfo.get("memory_free", None), int):
13034             raise errors.OpPrereqError("Secondary node %s didn't return free"
13035                                        " memory information" % node,
13036                                        errors.ECODE_STATE)
13037           #TODO(dynmem): do the appropriate check involving MINMEM
13038           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13039             raise errors.OpPrereqError("This change will prevent the instance"
13040                                        " from failover to its secondary node"
13041                                        " %s, due to not enough memory" % node,
13042                                        errors.ECODE_STATE)
13043
13044     if self.op.runtime_mem:
13045       remote_info = self.rpc.call_instance_info(instance.primary_node,
13046                                                 instance.name,
13047                                                 instance.hypervisor)
13048       remote_info.Raise("Error checking node %s" % instance.primary_node)
13049       if not remote_info.payload: # not running already
13050         raise errors.OpPrereqError("Instance %s is not running" %
13051                                    instance.name, errors.ECODE_STATE)
13052
13053       current_memory = remote_info.payload["memory"]
13054       if (not self.op.force and
13055            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13056             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13057         raise errors.OpPrereqError("Instance %s must have memory between %d"
13058                                    " and %d MB of memory unless --force is"
13059                                    " given" %
13060                                    (instance.name,
13061                                     self.be_proposed[constants.BE_MINMEM],
13062                                     self.be_proposed[constants.BE_MAXMEM]),
13063                                    errors.ECODE_INVAL)
13064
13065       delta = self.op.runtime_mem - current_memory
13066       if delta > 0:
13067         _CheckNodeFreeMemory(self, instance.primary_node,
13068                              "ballooning memory for instance %s" %
13069                              instance.name, delta, instance.hypervisor)
13070
13071     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13072       raise errors.OpPrereqError("Disk operations not supported for"
13073                                  " diskless instances", errors.ECODE_INVAL)
13074
13075     def _PrepareNicCreate(_, params, private):
13076       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13077       return (None, None)
13078
13079     def _PrepareNicMod(_, nic, params, private):
13080       self._PrepareNicModification(params, private, nic.ip,
13081                                    nic.nicparams, cluster, pnode)
13082       return None
13083
13084     # Verify NIC changes (operating on copy)
13085     nics = instance.nics[:]
13086     ApplyContainerMods("NIC", nics, None, self.nicmod,
13087                        _PrepareNicCreate, _PrepareNicMod, None)
13088     if len(nics) > constants.MAX_NICS:
13089       raise errors.OpPrereqError("Instance has too many network interfaces"
13090                                  " (%d), cannot add more" % constants.MAX_NICS,
13091                                  errors.ECODE_STATE)
13092
13093     # Verify disk changes (operating on a copy)
13094     disks = instance.disks[:]
13095     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13096     if len(disks) > constants.MAX_DISKS:
13097       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13098                                  " more" % constants.MAX_DISKS,
13099                                  errors.ECODE_STATE)
13100
13101     if self.op.offline is not None:
13102       if self.op.offline:
13103         msg = "can't change to offline"
13104       else:
13105         msg = "can't change to online"
13106       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13107
13108     # Pre-compute NIC changes (necessary to use result in hooks)
13109     self._nic_chgdesc = []
13110     if self.nicmod:
13111       # Operate on copies as this is still in prereq
13112       nics = [nic.Copy() for nic in instance.nics]
13113       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13114                          self._CreateNewNic, self._ApplyNicMods, None)
13115       self._new_nics = nics
13116     else:
13117       self._new_nics = None
13118
13119   def _ConvertPlainToDrbd(self, feedback_fn):
13120     """Converts an instance from plain to drbd.
13121
13122     """
13123     feedback_fn("Converting template to drbd")
13124     instance = self.instance
13125     pnode = instance.primary_node
13126     snode = self.op.remote_node
13127
13128     assert instance.disk_template == constants.DT_PLAIN
13129
13130     # create a fake disk info for _GenerateDiskTemplate
13131     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13132                   constants.IDISK_VG: d.logical_id[0]}
13133                  for d in instance.disks]
13134     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13135                                       instance.name, pnode, [snode],
13136                                       disk_info, None, None, 0, feedback_fn,
13137                                       self.diskparams)
13138     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13139                                         self.diskparams)
13140     info = _GetInstanceInfoText(instance)
13141     feedback_fn("Creating additional volumes...")
13142     # first, create the missing data and meta devices
13143     for disk in anno_disks:
13144       # unfortunately this is... not too nice
13145       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13146                             info, True)
13147       for child in disk.children:
13148         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13149     # at this stage, all new LVs have been created, we can rename the
13150     # old ones
13151     feedback_fn("Renaming original volumes...")
13152     rename_list = [(o, n.children[0].logical_id)
13153                    for (o, n) in zip(instance.disks, new_disks)]
13154     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13155     result.Raise("Failed to rename original LVs")
13156
13157     feedback_fn("Initializing DRBD devices...")
13158     # all child devices are in place, we can now create the DRBD devices
13159     for disk in anno_disks:
13160       for node in [pnode, snode]:
13161         f_create = node == pnode
13162         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13163
13164     # at this point, the instance has been modified
13165     instance.disk_template = constants.DT_DRBD8
13166     instance.disks = new_disks
13167     self.cfg.Update(instance, feedback_fn)
13168
13169     # Release node locks while waiting for sync
13170     _ReleaseLocks(self, locking.LEVEL_NODE)
13171
13172     # disks are created, waiting for sync
13173     disk_abort = not _WaitForSync(self, instance,
13174                                   oneshot=not self.op.wait_for_sync)
13175     if disk_abort:
13176       raise errors.OpExecError("There are some degraded disks for"
13177                                " this instance, please cleanup manually")
13178
13179     # Node resource locks will be released by caller
13180
13181   def _ConvertDrbdToPlain(self, feedback_fn):
13182     """Converts an instance from drbd to plain.
13183
13184     """
13185     instance = self.instance
13186
13187     assert len(instance.secondary_nodes) == 1
13188     assert instance.disk_template == constants.DT_DRBD8
13189
13190     pnode = instance.primary_node
13191     snode = instance.secondary_nodes[0]
13192     feedback_fn("Converting template to plain")
13193
13194     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13195     new_disks = [d.children[0] for d in instance.disks]
13196
13197     # copy over size and mode
13198     for parent, child in zip(old_disks, new_disks):
13199       child.size = parent.size
13200       child.mode = parent.mode
13201
13202     # this is a DRBD disk, return its port to the pool
13203     # NOTE: this must be done right before the call to cfg.Update!
13204     for disk in old_disks:
13205       tcp_port = disk.logical_id[2]
13206       self.cfg.AddTcpUdpPort(tcp_port)
13207
13208     # update instance structure
13209     instance.disks = new_disks
13210     instance.disk_template = constants.DT_PLAIN
13211     self.cfg.Update(instance, feedback_fn)
13212
13213     # Release locks in case removing disks takes a while
13214     _ReleaseLocks(self, locking.LEVEL_NODE)
13215
13216     feedback_fn("Removing volumes on the secondary node...")
13217     for disk in old_disks:
13218       self.cfg.SetDiskID(disk, snode)
13219       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13220       if msg:
13221         self.LogWarning("Could not remove block device %s on node %s,"
13222                         " continuing anyway: %s", disk.iv_name, snode, msg)
13223
13224     feedback_fn("Removing unneeded volumes on the primary node...")
13225     for idx, disk in enumerate(old_disks):
13226       meta = disk.children[1]
13227       self.cfg.SetDiskID(meta, pnode)
13228       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13229       if msg:
13230         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13231                         " continuing anyway: %s", idx, pnode, msg)
13232
13233   def _CreateNewDisk(self, idx, params, _):
13234     """Creates a new disk.
13235
13236     """
13237     instance = self.instance
13238
13239     # add a new disk
13240     if instance.disk_template in constants.DTS_FILEBASED:
13241       (file_driver, file_path) = instance.disks[0].logical_id
13242       file_path = os.path.dirname(file_path)
13243     else:
13244       file_driver = file_path = None
13245
13246     disk = \
13247       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13248                             instance.primary_node, instance.secondary_nodes,
13249                             [params], file_path, file_driver, idx,
13250                             self.Log, self.diskparams)[0]
13251
13252     info = _GetInstanceInfoText(instance)
13253
13254     logging.info("Creating volume %s for instance %s",
13255                  disk.iv_name, instance.name)
13256     # Note: this needs to be kept in sync with _CreateDisks
13257     #HARDCODE
13258     for node in instance.all_nodes:
13259       f_create = (node == instance.primary_node)
13260       try:
13261         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13262       except errors.OpExecError, err:
13263         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13264                         disk.iv_name, disk, node, err)
13265
13266     return (disk, [
13267       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13268       ])
13269
13270   @staticmethod
13271   def _ModifyDisk(idx, disk, params, _):
13272     """Modifies a disk.
13273
13274     """
13275     disk.mode = params[constants.IDISK_MODE]
13276
13277     return [
13278       ("disk.mode/%d" % idx, disk.mode),
13279       ]
13280
13281   def _RemoveDisk(self, idx, root, _):
13282     """Removes a disk.
13283
13284     """
13285     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13286     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13287       self.cfg.SetDiskID(disk, node)
13288       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13289       if msg:
13290         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13291                         " continuing anyway", idx, node, msg)
13292
13293     # if this is a DRBD disk, return its port to the pool
13294     if root.dev_type in constants.LDS_DRBD:
13295       self.cfg.AddTcpUdpPort(root.logical_id[2])
13296
13297   @staticmethod
13298   def _CreateNewNic(idx, params, private):
13299     """Creates data structure for a new network interface.
13300
13301     """
13302     mac = params[constants.INIC_MAC]
13303     ip = params.get(constants.INIC_IP, None)
13304     nicparams = private.params
13305
13306     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13307       ("nic.%d" % idx,
13308        "add:mac=%s,ip=%s,mode=%s,link=%s" %
13309        (mac, ip, private.filled[constants.NIC_MODE],
13310        private.filled[constants.NIC_LINK])),
13311       ])
13312
13313   @staticmethod
13314   def _ApplyNicMods(idx, nic, params, private):
13315     """Modifies a network interface.
13316
13317     """
13318     changes = []
13319
13320     for key in [constants.INIC_MAC, constants.INIC_IP]:
13321       if key in params:
13322         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13323         setattr(nic, key, params[key])
13324
13325     if private.params:
13326       nic.nicparams = private.params
13327
13328       for (key, val) in params.items():
13329         changes.append(("nic.%s/%d" % (key, idx), val))
13330
13331     return changes
13332
13333   def Exec(self, feedback_fn):
13334     """Modifies an instance.
13335
13336     All parameters take effect only at the next restart of the instance.
13337
13338     """
13339     # Process here the warnings from CheckPrereq, as we don't have a
13340     # feedback_fn there.
13341     # TODO: Replace with self.LogWarning
13342     for warn in self.warn:
13343       feedback_fn("WARNING: %s" % warn)
13344
13345     assert ((self.op.disk_template is None) ^
13346             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13347       "Not owning any node resource locks"
13348
13349     result = []
13350     instance = self.instance
13351
13352     # runtime memory
13353     if self.op.runtime_mem:
13354       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13355                                                      instance,
13356                                                      self.op.runtime_mem)
13357       rpcres.Raise("Cannot modify instance runtime memory")
13358       result.append(("runtime_memory", self.op.runtime_mem))
13359
13360     # Apply disk changes
13361     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13362                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13363     _UpdateIvNames(0, instance.disks)
13364
13365     if self.op.disk_template:
13366       if __debug__:
13367         check_nodes = set(instance.all_nodes)
13368         if self.op.remote_node:
13369           check_nodes.add(self.op.remote_node)
13370         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13371           owned = self.owned_locks(level)
13372           assert not (check_nodes - owned), \
13373             ("Not owning the correct locks, owning %r, expected at least %r" %
13374              (owned, check_nodes))
13375
13376       r_shut = _ShutdownInstanceDisks(self, instance)
13377       if not r_shut:
13378         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13379                                  " proceed with disk template conversion")
13380       mode = (instance.disk_template, self.op.disk_template)
13381       try:
13382         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13383       except:
13384         self.cfg.ReleaseDRBDMinors(instance.name)
13385         raise
13386       result.append(("disk_template", self.op.disk_template))
13387
13388       assert instance.disk_template == self.op.disk_template, \
13389         ("Expected disk template '%s', found '%s'" %
13390          (self.op.disk_template, instance.disk_template))
13391
13392     # Release node and resource locks if there are any (they might already have
13393     # been released during disk conversion)
13394     _ReleaseLocks(self, locking.LEVEL_NODE)
13395     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13396
13397     # Apply NIC changes
13398     if self._new_nics is not None:
13399       instance.nics = self._new_nics
13400       result.extend(self._nic_chgdesc)
13401
13402     # hvparams changes
13403     if self.op.hvparams:
13404       instance.hvparams = self.hv_inst
13405       for key, val in self.op.hvparams.iteritems():
13406         result.append(("hv/%s" % key, val))
13407
13408     # beparams changes
13409     if self.op.beparams:
13410       instance.beparams = self.be_inst
13411       for key, val in self.op.beparams.iteritems():
13412         result.append(("be/%s" % key, val))
13413
13414     # OS change
13415     if self.op.os_name:
13416       instance.os = self.op.os_name
13417
13418     # osparams changes
13419     if self.op.osparams:
13420       instance.osparams = self.os_inst
13421       for key, val in self.op.osparams.iteritems():
13422         result.append(("os/%s" % key, val))
13423
13424     if self.op.offline is None:
13425       # Ignore
13426       pass
13427     elif self.op.offline:
13428       # Mark instance as offline
13429       self.cfg.MarkInstanceOffline(instance.name)
13430       result.append(("admin_state", constants.ADMINST_OFFLINE))
13431     else:
13432       # Mark instance as online, but stopped
13433       self.cfg.MarkInstanceDown(instance.name)
13434       result.append(("admin_state", constants.ADMINST_DOWN))
13435
13436     self.cfg.Update(instance, feedback_fn)
13437
13438     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13439                 self.owned_locks(locking.LEVEL_NODE)), \
13440       "All node locks should have been released by now"
13441
13442     return result
13443
13444   _DISK_CONVERSIONS = {
13445     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13446     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13447     }
13448
13449
13450 class LUInstanceChangeGroup(LogicalUnit):
13451   HPATH = "instance-change-group"
13452   HTYPE = constants.HTYPE_INSTANCE
13453   REQ_BGL = False
13454
13455   def ExpandNames(self):
13456     self.share_locks = _ShareAll()
13457     self.needed_locks = {
13458       locking.LEVEL_NODEGROUP: [],
13459       locking.LEVEL_NODE: [],
13460       }
13461
13462     self._ExpandAndLockInstance()
13463
13464     if self.op.target_groups:
13465       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13466                                   self.op.target_groups)
13467     else:
13468       self.req_target_uuids = None
13469
13470     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13471
13472   def DeclareLocks(self, level):
13473     if level == locking.LEVEL_NODEGROUP:
13474       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13475
13476       if self.req_target_uuids:
13477         lock_groups = set(self.req_target_uuids)
13478
13479         # Lock all groups used by instance optimistically; this requires going
13480         # via the node before it's locked, requiring verification later on
13481         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13482         lock_groups.update(instance_groups)
13483       else:
13484         # No target groups, need to lock all of them
13485         lock_groups = locking.ALL_SET
13486
13487       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13488
13489     elif level == locking.LEVEL_NODE:
13490       if self.req_target_uuids:
13491         # Lock all nodes used by instances
13492         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13493         self._LockInstancesNodes()
13494
13495         # Lock all nodes in all potential target groups
13496         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13497                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13498         member_nodes = [node_name
13499                         for group in lock_groups
13500                         for node_name in self.cfg.GetNodeGroup(group).members]
13501         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13502       else:
13503         # Lock all nodes as all groups are potential targets
13504         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13505
13506   def CheckPrereq(self):
13507     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13508     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13509     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13510
13511     assert (self.req_target_uuids is None or
13512             owned_groups.issuperset(self.req_target_uuids))
13513     assert owned_instances == set([self.op.instance_name])
13514
13515     # Get instance information
13516     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13517
13518     # Check if node groups for locked instance are still correct
13519     assert owned_nodes.issuperset(self.instance.all_nodes), \
13520       ("Instance %s's nodes changed while we kept the lock" %
13521        self.op.instance_name)
13522
13523     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13524                                            owned_groups)
13525
13526     if self.req_target_uuids:
13527       # User requested specific target groups
13528       self.target_uuids = frozenset(self.req_target_uuids)
13529     else:
13530       # All groups except those used by the instance are potential targets
13531       self.target_uuids = owned_groups - inst_groups
13532
13533     conflicting_groups = self.target_uuids & inst_groups
13534     if conflicting_groups:
13535       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13536                                  " used by the instance '%s'" %
13537                                  (utils.CommaJoin(conflicting_groups),
13538                                   self.op.instance_name),
13539                                  errors.ECODE_INVAL)
13540
13541     if not self.target_uuids:
13542       raise errors.OpPrereqError("There are no possible target groups",
13543                                  errors.ECODE_INVAL)
13544
13545   def BuildHooksEnv(self):
13546     """Build hooks env.
13547
13548     """
13549     assert self.target_uuids
13550
13551     env = {
13552       "TARGET_GROUPS": " ".join(self.target_uuids),
13553       }
13554
13555     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13556
13557     return env
13558
13559   def BuildHooksNodes(self):
13560     """Build hooks nodes.
13561
13562     """
13563     mn = self.cfg.GetMasterNode()
13564     return ([mn], [mn])
13565
13566   def Exec(self, feedback_fn):
13567     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13568
13569     assert instances == [self.op.instance_name], "Instance not locked"
13570
13571     req = iallocator.IAReqGroupChange(instances=instances,
13572                                       target_groups=list(self.target_uuids))
13573     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13574
13575     ial.Run(self.op.iallocator)
13576
13577     if not ial.success:
13578       raise errors.OpPrereqError("Can't compute solution for changing group of"
13579                                  " instance '%s' using iallocator '%s': %s" %
13580                                  (self.op.instance_name, self.op.iallocator,
13581                                   ial.info), errors.ECODE_NORES)
13582
13583     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13584
13585     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13586                  " instance '%s'", len(jobs), self.op.instance_name)
13587
13588     return ResultWithJobs(jobs)
13589
13590
13591 class LUBackupQuery(NoHooksLU):
13592   """Query the exports list
13593
13594   """
13595   REQ_BGL = False
13596
13597   def CheckArguments(self):
13598     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13599                              ["node", "export"], self.op.use_locking)
13600
13601   def ExpandNames(self):
13602     self.expq.ExpandNames(self)
13603
13604   def DeclareLocks(self, level):
13605     self.expq.DeclareLocks(self, level)
13606
13607   def Exec(self, feedback_fn):
13608     result = {}
13609
13610     for (node, expname) in self.expq.OldStyleQuery(self):
13611       if expname is None:
13612         result[node] = False
13613       else:
13614         result.setdefault(node, []).append(expname)
13615
13616     return result
13617
13618
13619 class _ExportQuery(_QueryBase):
13620   FIELDS = query.EXPORT_FIELDS
13621
13622   #: The node name is not a unique key for this query
13623   SORT_FIELD = "node"
13624
13625   def ExpandNames(self, lu):
13626     lu.needed_locks = {}
13627
13628     # The following variables interact with _QueryBase._GetNames
13629     if self.names:
13630       self.wanted = _GetWantedNodes(lu, self.names)
13631     else:
13632       self.wanted = locking.ALL_SET
13633
13634     self.do_locking = self.use_locking
13635
13636     if self.do_locking:
13637       lu.share_locks = _ShareAll()
13638       lu.needed_locks = {
13639         locking.LEVEL_NODE: self.wanted,
13640         }
13641
13642   def DeclareLocks(self, lu, level):
13643     pass
13644
13645   def _GetQueryData(self, lu):
13646     """Computes the list of nodes and their attributes.
13647
13648     """
13649     # Locking is not used
13650     # TODO
13651     assert not (compat.any(lu.glm.is_owned(level)
13652                            for level in locking.LEVELS
13653                            if level != locking.LEVEL_CLUSTER) or
13654                 self.do_locking or self.use_locking)
13655
13656     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13657
13658     result = []
13659
13660     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13661       if nres.fail_msg:
13662         result.append((node, None))
13663       else:
13664         result.extend((node, expname) for expname in nres.payload)
13665
13666     return result
13667
13668
13669 class LUBackupPrepare(NoHooksLU):
13670   """Prepares an instance for an export and returns useful information.
13671
13672   """
13673   REQ_BGL = False
13674
13675   def ExpandNames(self):
13676     self._ExpandAndLockInstance()
13677
13678   def CheckPrereq(self):
13679     """Check prerequisites.
13680
13681     """
13682     instance_name = self.op.instance_name
13683
13684     self.instance = self.cfg.GetInstanceInfo(instance_name)
13685     assert self.instance is not None, \
13686           "Cannot retrieve locked instance %s" % self.op.instance_name
13687     _CheckNodeOnline(self, self.instance.primary_node)
13688
13689     self._cds = _GetClusterDomainSecret()
13690
13691   def Exec(self, feedback_fn):
13692     """Prepares an instance for an export.
13693
13694     """
13695     instance = self.instance
13696
13697     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13698       salt = utils.GenerateSecret(8)
13699
13700       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13701       result = self.rpc.call_x509_cert_create(instance.primary_node,
13702                                               constants.RIE_CERT_VALIDITY)
13703       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13704
13705       (name, cert_pem) = result.payload
13706
13707       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13708                                              cert_pem)
13709
13710       return {
13711         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13712         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13713                           salt),
13714         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13715         }
13716
13717     return None
13718
13719
13720 class LUBackupExport(LogicalUnit):
13721   """Export an instance to an image in the cluster.
13722
13723   """
13724   HPATH = "instance-export"
13725   HTYPE = constants.HTYPE_INSTANCE
13726   REQ_BGL = False
13727
13728   def CheckArguments(self):
13729     """Check the arguments.
13730
13731     """
13732     self.x509_key_name = self.op.x509_key_name
13733     self.dest_x509_ca_pem = self.op.destination_x509_ca
13734
13735     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13736       if not self.x509_key_name:
13737         raise errors.OpPrereqError("Missing X509 key name for encryption",
13738                                    errors.ECODE_INVAL)
13739
13740       if not self.dest_x509_ca_pem:
13741         raise errors.OpPrereqError("Missing destination X509 CA",
13742                                    errors.ECODE_INVAL)
13743
13744   def ExpandNames(self):
13745     self._ExpandAndLockInstance()
13746
13747     # Lock all nodes for local exports
13748     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13749       # FIXME: lock only instance primary and destination node
13750       #
13751       # Sad but true, for now we have do lock all nodes, as we don't know where
13752       # the previous export might be, and in this LU we search for it and
13753       # remove it from its current node. In the future we could fix this by:
13754       #  - making a tasklet to search (share-lock all), then create the
13755       #    new one, then one to remove, after
13756       #  - removing the removal operation altogether
13757       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13758
13759   def DeclareLocks(self, level):
13760     """Last minute lock declaration."""
13761     # All nodes are locked anyway, so nothing to do here.
13762
13763   def BuildHooksEnv(self):
13764     """Build hooks env.
13765
13766     This will run on the master, primary node and target node.
13767
13768     """
13769     env = {
13770       "EXPORT_MODE": self.op.mode,
13771       "EXPORT_NODE": self.op.target_node,
13772       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13773       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13774       # TODO: Generic function for boolean env variables
13775       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13776       }
13777
13778     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13779
13780     return env
13781
13782   def BuildHooksNodes(self):
13783     """Build hooks nodes.
13784
13785     """
13786     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13787
13788     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13789       nl.append(self.op.target_node)
13790
13791     return (nl, nl)
13792
13793   def CheckPrereq(self):
13794     """Check prerequisites.
13795
13796     This checks that the instance and node names are valid.
13797
13798     """
13799     instance_name = self.op.instance_name
13800
13801     self.instance = self.cfg.GetInstanceInfo(instance_name)
13802     assert self.instance is not None, \
13803           "Cannot retrieve locked instance %s" % self.op.instance_name
13804     _CheckNodeOnline(self, self.instance.primary_node)
13805
13806     if (self.op.remove_instance and
13807         self.instance.admin_state == constants.ADMINST_UP and
13808         not self.op.shutdown):
13809       raise errors.OpPrereqError("Can not remove instance without shutting it"
13810                                  " down before", errors.ECODE_STATE)
13811
13812     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13813       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13814       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13815       assert self.dst_node is not None
13816
13817       _CheckNodeOnline(self, self.dst_node.name)
13818       _CheckNodeNotDrained(self, self.dst_node.name)
13819
13820       self._cds = None
13821       self.dest_disk_info = None
13822       self.dest_x509_ca = None
13823
13824     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13825       self.dst_node = None
13826
13827       if len(self.op.target_node) != len(self.instance.disks):
13828         raise errors.OpPrereqError(("Received destination information for %s"
13829                                     " disks, but instance %s has %s disks") %
13830                                    (len(self.op.target_node), instance_name,
13831                                     len(self.instance.disks)),
13832                                    errors.ECODE_INVAL)
13833
13834       cds = _GetClusterDomainSecret()
13835
13836       # Check X509 key name
13837       try:
13838         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13839       except (TypeError, ValueError), err:
13840         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13841                                    errors.ECODE_INVAL)
13842
13843       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13844         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13845                                    errors.ECODE_INVAL)
13846
13847       # Load and verify CA
13848       try:
13849         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13850       except OpenSSL.crypto.Error, err:
13851         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13852                                    (err, ), errors.ECODE_INVAL)
13853
13854       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13855       if errcode is not None:
13856         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13857                                    (msg, ), errors.ECODE_INVAL)
13858
13859       self.dest_x509_ca = cert
13860
13861       # Verify target information
13862       disk_info = []
13863       for idx, disk_data in enumerate(self.op.target_node):
13864         try:
13865           (host, port, magic) = \
13866             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13867         except errors.GenericError, err:
13868           raise errors.OpPrereqError("Target info for disk %s: %s" %
13869                                      (idx, err), errors.ECODE_INVAL)
13870
13871         disk_info.append((host, port, magic))
13872
13873       assert len(disk_info) == len(self.op.target_node)
13874       self.dest_disk_info = disk_info
13875
13876     else:
13877       raise errors.ProgrammerError("Unhandled export mode %r" %
13878                                    self.op.mode)
13879
13880     # instance disk type verification
13881     # TODO: Implement export support for file-based disks
13882     for disk in self.instance.disks:
13883       if disk.dev_type == constants.LD_FILE:
13884         raise errors.OpPrereqError("Export not supported for instances with"
13885                                    " file-based disks", errors.ECODE_INVAL)
13886
13887   def _CleanupExports(self, feedback_fn):
13888     """Removes exports of current instance from all other nodes.
13889
13890     If an instance in a cluster with nodes A..D was exported to node C, its
13891     exports will be removed from the nodes A, B and D.
13892
13893     """
13894     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13895
13896     nodelist = self.cfg.GetNodeList()
13897     nodelist.remove(self.dst_node.name)
13898
13899     # on one-node clusters nodelist will be empty after the removal
13900     # if we proceed the backup would be removed because OpBackupQuery
13901     # substitutes an empty list with the full cluster node list.
13902     iname = self.instance.name
13903     if nodelist:
13904       feedback_fn("Removing old exports for instance %s" % iname)
13905       exportlist = self.rpc.call_export_list(nodelist)
13906       for node in exportlist:
13907         if exportlist[node].fail_msg:
13908           continue
13909         if iname in exportlist[node].payload:
13910           msg = self.rpc.call_export_remove(node, iname).fail_msg
13911           if msg:
13912             self.LogWarning("Could not remove older export for instance %s"
13913                             " on node %s: %s", iname, node, msg)
13914
13915   def Exec(self, feedback_fn):
13916     """Export an instance to an image in the cluster.
13917
13918     """
13919     assert self.op.mode in constants.EXPORT_MODES
13920
13921     instance = self.instance
13922     src_node = instance.primary_node
13923
13924     if self.op.shutdown:
13925       # shutdown the instance, but not the disks
13926       feedback_fn("Shutting down instance %s" % instance.name)
13927       result = self.rpc.call_instance_shutdown(src_node, instance,
13928                                                self.op.shutdown_timeout)
13929       # TODO: Maybe ignore failures if ignore_remove_failures is set
13930       result.Raise("Could not shutdown instance %s on"
13931                    " node %s" % (instance.name, src_node))
13932
13933     # set the disks ID correctly since call_instance_start needs the
13934     # correct drbd minor to create the symlinks
13935     for disk in instance.disks:
13936       self.cfg.SetDiskID(disk, src_node)
13937
13938     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13939
13940     if activate_disks:
13941       # Activate the instance disks if we'exporting a stopped instance
13942       feedback_fn("Activating disks for %s" % instance.name)
13943       _StartInstanceDisks(self, instance, None)
13944
13945     try:
13946       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13947                                                      instance)
13948
13949       helper.CreateSnapshots()
13950       try:
13951         if (self.op.shutdown and
13952             instance.admin_state == constants.ADMINST_UP and
13953             not self.op.remove_instance):
13954           assert not activate_disks
13955           feedback_fn("Starting instance %s" % instance.name)
13956           result = self.rpc.call_instance_start(src_node,
13957                                                 (instance, None, None), False)
13958           msg = result.fail_msg
13959           if msg:
13960             feedback_fn("Failed to start instance: %s" % msg)
13961             _ShutdownInstanceDisks(self, instance)
13962             raise errors.OpExecError("Could not start instance: %s" % msg)
13963
13964         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13965           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13966         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13967           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13968           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13969
13970           (key_name, _, _) = self.x509_key_name
13971
13972           dest_ca_pem = \
13973             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13974                                             self.dest_x509_ca)
13975
13976           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13977                                                      key_name, dest_ca_pem,
13978                                                      timeouts)
13979       finally:
13980         helper.Cleanup()
13981
13982       # Check for backwards compatibility
13983       assert len(dresults) == len(instance.disks)
13984       assert compat.all(isinstance(i, bool) for i in dresults), \
13985              "Not all results are boolean: %r" % dresults
13986
13987     finally:
13988       if activate_disks:
13989         feedback_fn("Deactivating disks for %s" % instance.name)
13990         _ShutdownInstanceDisks(self, instance)
13991
13992     if not (compat.all(dresults) and fin_resu):
13993       failures = []
13994       if not fin_resu:
13995         failures.append("export finalization")
13996       if not compat.all(dresults):
13997         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13998                                if not dsk)
13999         failures.append("disk export: disk(s) %s" % fdsk)
14000
14001       raise errors.OpExecError("Export failed, errors in %s" %
14002                                utils.CommaJoin(failures))
14003
14004     # At this point, the export was successful, we can cleanup/finish
14005
14006     # Remove instance if requested
14007     if self.op.remove_instance:
14008       feedback_fn("Removing instance %s" % instance.name)
14009       _RemoveInstance(self, feedback_fn, instance,
14010                       self.op.ignore_remove_failures)
14011
14012     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14013       self._CleanupExports(feedback_fn)
14014
14015     return fin_resu, dresults
14016
14017
14018 class LUBackupRemove(NoHooksLU):
14019   """Remove exports related to the named instance.
14020
14021   """
14022   REQ_BGL = False
14023
14024   def ExpandNames(self):
14025     self.needed_locks = {}
14026     # We need all nodes to be locked in order for RemoveExport to work, but we
14027     # don't need to lock the instance itself, as nothing will happen to it (and
14028     # we can remove exports also for a removed instance)
14029     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14030
14031   def Exec(self, feedback_fn):
14032     """Remove any export.
14033
14034     """
14035     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14036     # If the instance was not found we'll try with the name that was passed in.
14037     # This will only work if it was an FQDN, though.
14038     fqdn_warn = False
14039     if not instance_name:
14040       fqdn_warn = True
14041       instance_name = self.op.instance_name
14042
14043     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14044     exportlist = self.rpc.call_export_list(locked_nodes)
14045     found = False
14046     for node in exportlist:
14047       msg = exportlist[node].fail_msg
14048       if msg:
14049         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14050         continue
14051       if instance_name in exportlist[node].payload:
14052         found = True
14053         result = self.rpc.call_export_remove(node, instance_name)
14054         msg = result.fail_msg
14055         if msg:
14056           logging.error("Could not remove export for instance %s"
14057                         " on node %s: %s", instance_name, node, msg)
14058
14059     if fqdn_warn and not found:
14060       feedback_fn("Export not found. If trying to remove an export belonging"
14061                   " to a deleted instance please use its Fully Qualified"
14062                   " Domain Name.")
14063
14064
14065 class LUGroupAdd(LogicalUnit):
14066   """Logical unit for creating node groups.
14067
14068   """
14069   HPATH = "group-add"
14070   HTYPE = constants.HTYPE_GROUP
14071   REQ_BGL = False
14072
14073   def ExpandNames(self):
14074     # We need the new group's UUID here so that we can create and acquire the
14075     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14076     # that it should not check whether the UUID exists in the configuration.
14077     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14078     self.needed_locks = {}
14079     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14080
14081   def CheckPrereq(self):
14082     """Check prerequisites.
14083
14084     This checks that the given group name is not an existing node group
14085     already.
14086
14087     """
14088     try:
14089       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14090     except errors.OpPrereqError:
14091       pass
14092     else:
14093       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14094                                  " node group (UUID: %s)" %
14095                                  (self.op.group_name, existing_uuid),
14096                                  errors.ECODE_EXISTS)
14097
14098     if self.op.ndparams:
14099       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14100
14101     if self.op.hv_state:
14102       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14103     else:
14104       self.new_hv_state = None
14105
14106     if self.op.disk_state:
14107       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14108     else:
14109       self.new_disk_state = None
14110
14111     if self.op.diskparams:
14112       for templ in constants.DISK_TEMPLATES:
14113         if templ in self.op.diskparams:
14114           utils.ForceDictType(self.op.diskparams[templ],
14115                               constants.DISK_DT_TYPES)
14116       self.new_diskparams = self.op.diskparams
14117       try:
14118         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14119       except errors.OpPrereqError, err:
14120         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14121                                    errors.ECODE_INVAL)
14122     else:
14123       self.new_diskparams = {}
14124
14125     if self.op.ipolicy:
14126       cluster = self.cfg.GetClusterInfo()
14127       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14128       try:
14129         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14130       except errors.ConfigurationError, err:
14131         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14132                                    errors.ECODE_INVAL)
14133
14134   def BuildHooksEnv(self):
14135     """Build hooks env.
14136
14137     """
14138     return {
14139       "GROUP_NAME": self.op.group_name,
14140       }
14141
14142   def BuildHooksNodes(self):
14143     """Build hooks nodes.
14144
14145     """
14146     mn = self.cfg.GetMasterNode()
14147     return ([mn], [mn])
14148
14149   def Exec(self, feedback_fn):
14150     """Add the node group to the cluster.
14151
14152     """
14153     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14154                                   uuid=self.group_uuid,
14155                                   alloc_policy=self.op.alloc_policy,
14156                                   ndparams=self.op.ndparams,
14157                                   diskparams=self.new_diskparams,
14158                                   ipolicy=self.op.ipolicy,
14159                                   hv_state_static=self.new_hv_state,
14160                                   disk_state_static=self.new_disk_state)
14161
14162     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14163     del self.remove_locks[locking.LEVEL_NODEGROUP]
14164
14165
14166 class LUGroupAssignNodes(NoHooksLU):
14167   """Logical unit for assigning nodes to groups.
14168
14169   """
14170   REQ_BGL = False
14171
14172   def ExpandNames(self):
14173     # These raise errors.OpPrereqError on their own:
14174     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14175     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14176
14177     # We want to lock all the affected nodes and groups. We have readily
14178     # available the list of nodes, and the *destination* group. To gather the
14179     # list of "source" groups, we need to fetch node information later on.
14180     self.needed_locks = {
14181       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14182       locking.LEVEL_NODE: self.op.nodes,
14183       }
14184
14185   def DeclareLocks(self, level):
14186     if level == locking.LEVEL_NODEGROUP:
14187       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14188
14189       # Try to get all affected nodes' groups without having the group or node
14190       # lock yet. Needs verification later in the code flow.
14191       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14192
14193       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14194
14195   def CheckPrereq(self):
14196     """Check prerequisites.
14197
14198     """
14199     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14200     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14201             frozenset(self.op.nodes))
14202
14203     expected_locks = (set([self.group_uuid]) |
14204                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14205     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14206     if actual_locks != expected_locks:
14207       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14208                                " current groups are '%s', used to be '%s'" %
14209                                (utils.CommaJoin(expected_locks),
14210                                 utils.CommaJoin(actual_locks)))
14211
14212     self.node_data = self.cfg.GetAllNodesInfo()
14213     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14214     instance_data = self.cfg.GetAllInstancesInfo()
14215
14216     if self.group is None:
14217       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14218                                (self.op.group_name, self.group_uuid))
14219
14220     (new_splits, previous_splits) = \
14221       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14222                                              for node in self.op.nodes],
14223                                             self.node_data, instance_data)
14224
14225     if new_splits:
14226       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14227
14228       if not self.op.force:
14229         raise errors.OpExecError("The following instances get split by this"
14230                                  " change and --force was not given: %s" %
14231                                  fmt_new_splits)
14232       else:
14233         self.LogWarning("This operation will split the following instances: %s",
14234                         fmt_new_splits)
14235
14236         if previous_splits:
14237           self.LogWarning("In addition, these already-split instances continue"
14238                           " to be split across groups: %s",
14239                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14240
14241   def Exec(self, feedback_fn):
14242     """Assign nodes to a new group.
14243
14244     """
14245     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14246
14247     self.cfg.AssignGroupNodes(mods)
14248
14249   @staticmethod
14250   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14251     """Check for split instances after a node assignment.
14252
14253     This method considers a series of node assignments as an atomic operation,
14254     and returns information about split instances after applying the set of
14255     changes.
14256
14257     In particular, it returns information about newly split instances, and
14258     instances that were already split, and remain so after the change.
14259
14260     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14261     considered.
14262
14263     @type changes: list of (node_name, new_group_uuid) pairs.
14264     @param changes: list of node assignments to consider.
14265     @param node_data: a dict with data for all nodes
14266     @param instance_data: a dict with all instances to consider
14267     @rtype: a two-tuple
14268     @return: a list of instances that were previously okay and result split as a
14269       consequence of this change, and a list of instances that were previously
14270       split and this change does not fix.
14271
14272     """
14273     changed_nodes = dict((node, group) for node, group in changes
14274                          if node_data[node].group != group)
14275
14276     all_split_instances = set()
14277     previously_split_instances = set()
14278
14279     def InstanceNodes(instance):
14280       return [instance.primary_node] + list(instance.secondary_nodes)
14281
14282     for inst in instance_data.values():
14283       if inst.disk_template not in constants.DTS_INT_MIRROR:
14284         continue
14285
14286       instance_nodes = InstanceNodes(inst)
14287
14288       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14289         previously_split_instances.add(inst.name)
14290
14291       if len(set(changed_nodes.get(node, node_data[node].group)
14292                  for node in instance_nodes)) > 1:
14293         all_split_instances.add(inst.name)
14294
14295     return (list(all_split_instances - previously_split_instances),
14296             list(previously_split_instances & all_split_instances))
14297
14298
14299 class _GroupQuery(_QueryBase):
14300   FIELDS = query.GROUP_FIELDS
14301
14302   def ExpandNames(self, lu):
14303     lu.needed_locks = {}
14304
14305     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14306     self._cluster = lu.cfg.GetClusterInfo()
14307     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14308
14309     if not self.names:
14310       self.wanted = [name_to_uuid[name]
14311                      for name in utils.NiceSort(name_to_uuid.keys())]
14312     else:
14313       # Accept names to be either names or UUIDs.
14314       missing = []
14315       self.wanted = []
14316       all_uuid = frozenset(self._all_groups.keys())
14317
14318       for name in self.names:
14319         if name in all_uuid:
14320           self.wanted.append(name)
14321         elif name in name_to_uuid:
14322           self.wanted.append(name_to_uuid[name])
14323         else:
14324           missing.append(name)
14325
14326       if missing:
14327         raise errors.OpPrereqError("Some groups do not exist: %s" %
14328                                    utils.CommaJoin(missing),
14329                                    errors.ECODE_NOENT)
14330
14331   def DeclareLocks(self, lu, level):
14332     pass
14333
14334   def _GetQueryData(self, lu):
14335     """Computes the list of node groups and their attributes.
14336
14337     """
14338     do_nodes = query.GQ_NODE in self.requested_data
14339     do_instances = query.GQ_INST in self.requested_data
14340
14341     group_to_nodes = None
14342     group_to_instances = None
14343
14344     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14345     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14346     # latter GetAllInstancesInfo() is not enough, for we have to go through
14347     # instance->node. Hence, we will need to process nodes even if we only need
14348     # instance information.
14349     if do_nodes or do_instances:
14350       all_nodes = lu.cfg.GetAllNodesInfo()
14351       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14352       node_to_group = {}
14353
14354       for node in all_nodes.values():
14355         if node.group in group_to_nodes:
14356           group_to_nodes[node.group].append(node.name)
14357           node_to_group[node.name] = node.group
14358
14359       if do_instances:
14360         all_instances = lu.cfg.GetAllInstancesInfo()
14361         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14362
14363         for instance in all_instances.values():
14364           node = instance.primary_node
14365           if node in node_to_group:
14366             group_to_instances[node_to_group[node]].append(instance.name)
14367
14368         if not do_nodes:
14369           # Do not pass on node information if it was not requested.
14370           group_to_nodes = None
14371
14372     return query.GroupQueryData(self._cluster,
14373                                 [self._all_groups[uuid]
14374                                  for uuid in self.wanted],
14375                                 group_to_nodes, group_to_instances,
14376                                 query.GQ_DISKPARAMS in self.requested_data)
14377
14378
14379 class LUGroupQuery(NoHooksLU):
14380   """Logical unit for querying node groups.
14381
14382   """
14383   REQ_BGL = False
14384
14385   def CheckArguments(self):
14386     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14387                           self.op.output_fields, False)
14388
14389   def ExpandNames(self):
14390     self.gq.ExpandNames(self)
14391
14392   def DeclareLocks(self, level):
14393     self.gq.DeclareLocks(self, level)
14394
14395   def Exec(self, feedback_fn):
14396     return self.gq.OldStyleQuery(self)
14397
14398
14399 class LUGroupSetParams(LogicalUnit):
14400   """Modifies the parameters of a node group.
14401
14402   """
14403   HPATH = "group-modify"
14404   HTYPE = constants.HTYPE_GROUP
14405   REQ_BGL = False
14406
14407   def CheckArguments(self):
14408     all_changes = [
14409       self.op.ndparams,
14410       self.op.diskparams,
14411       self.op.alloc_policy,
14412       self.op.hv_state,
14413       self.op.disk_state,
14414       self.op.ipolicy,
14415       ]
14416
14417     if all_changes.count(None) == len(all_changes):
14418       raise errors.OpPrereqError("Please pass at least one modification",
14419                                  errors.ECODE_INVAL)
14420
14421   def ExpandNames(self):
14422     # This raises errors.OpPrereqError on its own:
14423     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14424
14425     self.needed_locks = {
14426       locking.LEVEL_INSTANCE: [],
14427       locking.LEVEL_NODEGROUP: [self.group_uuid],
14428       }
14429
14430     self.share_locks[locking.LEVEL_INSTANCE] = 1
14431
14432   def DeclareLocks(self, level):
14433     if level == locking.LEVEL_INSTANCE:
14434       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14435
14436       # Lock instances optimistically, needs verification once group lock has
14437       # been acquired
14438       self.needed_locks[locking.LEVEL_INSTANCE] = \
14439           self.cfg.GetNodeGroupInstances(self.group_uuid)
14440
14441   @staticmethod
14442   def _UpdateAndVerifyDiskParams(old, new):
14443     """Updates and verifies disk parameters.
14444
14445     """
14446     new_params = _GetUpdatedParams(old, new)
14447     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14448     return new_params
14449
14450   def CheckPrereq(self):
14451     """Check prerequisites.
14452
14453     """
14454     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14455
14456     # Check if locked instances are still correct
14457     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14458
14459     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14460     cluster = self.cfg.GetClusterInfo()
14461
14462     if self.group is None:
14463       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14464                                (self.op.group_name, self.group_uuid))
14465
14466     if self.op.ndparams:
14467       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14468       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14469       self.new_ndparams = new_ndparams
14470
14471     if self.op.diskparams:
14472       diskparams = self.group.diskparams
14473       uavdp = self._UpdateAndVerifyDiskParams
14474       # For each disktemplate subdict update and verify the values
14475       new_diskparams = dict((dt,
14476                              uavdp(diskparams.get(dt, {}),
14477                                    self.op.diskparams[dt]))
14478                             for dt in constants.DISK_TEMPLATES
14479                             if dt in self.op.diskparams)
14480       # As we've all subdicts of diskparams ready, lets merge the actual
14481       # dict with all updated subdicts
14482       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14483       try:
14484         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14485       except errors.OpPrereqError, err:
14486         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14487                                    errors.ECODE_INVAL)
14488
14489     if self.op.hv_state:
14490       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14491                                                  self.group.hv_state_static)
14492
14493     if self.op.disk_state:
14494       self.new_disk_state = \
14495         _MergeAndVerifyDiskState(self.op.disk_state,
14496                                  self.group.disk_state_static)
14497
14498     if self.op.ipolicy:
14499       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14500                                             self.op.ipolicy,
14501                                             group_policy=True)
14502
14503       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14504       inst_filter = lambda inst: inst.name in owned_instances
14505       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14506       gmi = ganeti.masterd.instance
14507       violations = \
14508           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14509                                                                   self.group),
14510                                         new_ipolicy, instances)
14511
14512       if violations:
14513         self.LogWarning("After the ipolicy change the following instances"
14514                         " violate them: %s",
14515                         utils.CommaJoin(violations))
14516
14517   def BuildHooksEnv(self):
14518     """Build hooks env.
14519
14520     """
14521     return {
14522       "GROUP_NAME": self.op.group_name,
14523       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14524       }
14525
14526   def BuildHooksNodes(self):
14527     """Build hooks nodes.
14528
14529     """
14530     mn = self.cfg.GetMasterNode()
14531     return ([mn], [mn])
14532
14533   def Exec(self, feedback_fn):
14534     """Modifies the node group.
14535
14536     """
14537     result = []
14538
14539     if self.op.ndparams:
14540       self.group.ndparams = self.new_ndparams
14541       result.append(("ndparams", str(self.group.ndparams)))
14542
14543     if self.op.diskparams:
14544       self.group.diskparams = self.new_diskparams
14545       result.append(("diskparams", str(self.group.diskparams)))
14546
14547     if self.op.alloc_policy:
14548       self.group.alloc_policy = self.op.alloc_policy
14549
14550     if self.op.hv_state:
14551       self.group.hv_state_static = self.new_hv_state
14552
14553     if self.op.disk_state:
14554       self.group.disk_state_static = self.new_disk_state
14555
14556     if self.op.ipolicy:
14557       self.group.ipolicy = self.new_ipolicy
14558
14559     self.cfg.Update(self.group, feedback_fn)
14560     return result
14561
14562
14563 class LUGroupRemove(LogicalUnit):
14564   HPATH = "group-remove"
14565   HTYPE = constants.HTYPE_GROUP
14566   REQ_BGL = False
14567
14568   def ExpandNames(self):
14569     # This will raises errors.OpPrereqError on its own:
14570     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14571     self.needed_locks = {
14572       locking.LEVEL_NODEGROUP: [self.group_uuid],
14573       }
14574
14575   def CheckPrereq(self):
14576     """Check prerequisites.
14577
14578     This checks that the given group name exists as a node group, that is
14579     empty (i.e., contains no nodes), and that is not the last group of the
14580     cluster.
14581
14582     """
14583     # Verify that the group is empty.
14584     group_nodes = [node.name
14585                    for node in self.cfg.GetAllNodesInfo().values()
14586                    if node.group == self.group_uuid]
14587
14588     if group_nodes:
14589       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14590                                  " nodes: %s" %
14591                                  (self.op.group_name,
14592                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14593                                  errors.ECODE_STATE)
14594
14595     # Verify the cluster would not be left group-less.
14596     if len(self.cfg.GetNodeGroupList()) == 1:
14597       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14598                                  " removed" % self.op.group_name,
14599                                  errors.ECODE_STATE)
14600
14601   def BuildHooksEnv(self):
14602     """Build hooks env.
14603
14604     """
14605     return {
14606       "GROUP_NAME": self.op.group_name,
14607       }
14608
14609   def BuildHooksNodes(self):
14610     """Build hooks nodes.
14611
14612     """
14613     mn = self.cfg.GetMasterNode()
14614     return ([mn], [mn])
14615
14616   def Exec(self, feedback_fn):
14617     """Remove the node group.
14618
14619     """
14620     try:
14621       self.cfg.RemoveNodeGroup(self.group_uuid)
14622     except errors.ConfigurationError:
14623       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14624                                (self.op.group_name, self.group_uuid))
14625
14626     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14627
14628
14629 class LUGroupRename(LogicalUnit):
14630   HPATH = "group-rename"
14631   HTYPE = constants.HTYPE_GROUP
14632   REQ_BGL = False
14633
14634   def ExpandNames(self):
14635     # This raises errors.OpPrereqError on its own:
14636     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14637
14638     self.needed_locks = {
14639       locking.LEVEL_NODEGROUP: [self.group_uuid],
14640       }
14641
14642   def CheckPrereq(self):
14643     """Check prerequisites.
14644
14645     Ensures requested new name is not yet used.
14646
14647     """
14648     try:
14649       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14650     except errors.OpPrereqError:
14651       pass
14652     else:
14653       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14654                                  " node group (UUID: %s)" %
14655                                  (self.op.new_name, new_name_uuid),
14656                                  errors.ECODE_EXISTS)
14657
14658   def BuildHooksEnv(self):
14659     """Build hooks env.
14660
14661     """
14662     return {
14663       "OLD_NAME": self.op.group_name,
14664       "NEW_NAME": self.op.new_name,
14665       }
14666
14667   def BuildHooksNodes(self):
14668     """Build hooks nodes.
14669
14670     """
14671     mn = self.cfg.GetMasterNode()
14672
14673     all_nodes = self.cfg.GetAllNodesInfo()
14674     all_nodes.pop(mn, None)
14675
14676     run_nodes = [mn]
14677     run_nodes.extend(node.name for node in all_nodes.values()
14678                      if node.group == self.group_uuid)
14679
14680     return (run_nodes, run_nodes)
14681
14682   def Exec(self, feedback_fn):
14683     """Rename the node group.
14684
14685     """
14686     group = self.cfg.GetNodeGroup(self.group_uuid)
14687
14688     if group is None:
14689       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14690                                (self.op.group_name, self.group_uuid))
14691
14692     group.name = self.op.new_name
14693     self.cfg.Update(group, feedback_fn)
14694
14695     return self.op.new_name
14696
14697
14698 class LUGroupEvacuate(LogicalUnit):
14699   HPATH = "group-evacuate"
14700   HTYPE = constants.HTYPE_GROUP
14701   REQ_BGL = False
14702
14703   def ExpandNames(self):
14704     # This raises errors.OpPrereqError on its own:
14705     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14706
14707     if self.op.target_groups:
14708       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14709                                   self.op.target_groups)
14710     else:
14711       self.req_target_uuids = []
14712
14713     if self.group_uuid in self.req_target_uuids:
14714       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14715                                  " as a target group (targets are %s)" %
14716                                  (self.group_uuid,
14717                                   utils.CommaJoin(self.req_target_uuids)),
14718                                  errors.ECODE_INVAL)
14719
14720     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14721
14722     self.share_locks = _ShareAll()
14723     self.needed_locks = {
14724       locking.LEVEL_INSTANCE: [],
14725       locking.LEVEL_NODEGROUP: [],
14726       locking.LEVEL_NODE: [],
14727       }
14728
14729   def DeclareLocks(self, level):
14730     if level == locking.LEVEL_INSTANCE:
14731       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14732
14733       # Lock instances optimistically, needs verification once node and group
14734       # locks have been acquired
14735       self.needed_locks[locking.LEVEL_INSTANCE] = \
14736         self.cfg.GetNodeGroupInstances(self.group_uuid)
14737
14738     elif level == locking.LEVEL_NODEGROUP:
14739       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14740
14741       if self.req_target_uuids:
14742         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14743
14744         # Lock all groups used by instances optimistically; this requires going
14745         # via the node before it's locked, requiring verification later on
14746         lock_groups.update(group_uuid
14747                            for instance_name in
14748                              self.owned_locks(locking.LEVEL_INSTANCE)
14749                            for group_uuid in
14750                              self.cfg.GetInstanceNodeGroups(instance_name))
14751       else:
14752         # No target groups, need to lock all of them
14753         lock_groups = locking.ALL_SET
14754
14755       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14756
14757     elif level == locking.LEVEL_NODE:
14758       # This will only lock the nodes in the group to be evacuated which
14759       # contain actual instances
14760       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14761       self._LockInstancesNodes()
14762
14763       # Lock all nodes in group to be evacuated and target groups
14764       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14765       assert self.group_uuid in owned_groups
14766       member_nodes = [node_name
14767                       for group in owned_groups
14768                       for node_name in self.cfg.GetNodeGroup(group).members]
14769       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14770
14771   def CheckPrereq(self):
14772     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14773     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14774     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14775
14776     assert owned_groups.issuperset(self.req_target_uuids)
14777     assert self.group_uuid in owned_groups
14778
14779     # Check if locked instances are still correct
14780     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14781
14782     # Get instance information
14783     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14784
14785     # Check if node groups for locked instances are still correct
14786     _CheckInstancesNodeGroups(self.cfg, self.instances,
14787                               owned_groups, owned_nodes, self.group_uuid)
14788
14789     if self.req_target_uuids:
14790       # User requested specific target groups
14791       self.target_uuids = self.req_target_uuids
14792     else:
14793       # All groups except the one to be evacuated are potential targets
14794       self.target_uuids = [group_uuid for group_uuid in owned_groups
14795                            if group_uuid != self.group_uuid]
14796
14797       if not self.target_uuids:
14798         raise errors.OpPrereqError("There are no possible target groups",
14799                                    errors.ECODE_INVAL)
14800
14801   def BuildHooksEnv(self):
14802     """Build hooks env.
14803
14804     """
14805     return {
14806       "GROUP_NAME": self.op.group_name,
14807       "TARGET_GROUPS": " ".join(self.target_uuids),
14808       }
14809
14810   def BuildHooksNodes(self):
14811     """Build hooks nodes.
14812
14813     """
14814     mn = self.cfg.GetMasterNode()
14815
14816     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14817
14818     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14819
14820     return (run_nodes, run_nodes)
14821
14822   def Exec(self, feedback_fn):
14823     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14824
14825     assert self.group_uuid not in self.target_uuids
14826
14827     req = iallocator.IAReqGroupChange(instances=instances,
14828                                       target_groups=self.target_uuids)
14829     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14830
14831     ial.Run(self.op.iallocator)
14832
14833     if not ial.success:
14834       raise errors.OpPrereqError("Can't compute group evacuation using"
14835                                  " iallocator '%s': %s" %
14836                                  (self.op.iallocator, ial.info),
14837                                  errors.ECODE_NORES)
14838
14839     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14840
14841     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14842                  len(jobs), self.op.group_name)
14843
14844     return ResultWithJobs(jobs)
14845
14846
14847 class TagsLU(NoHooksLU): # pylint: disable=W0223
14848   """Generic tags LU.
14849
14850   This is an abstract class which is the parent of all the other tags LUs.
14851
14852   """
14853   def ExpandNames(self):
14854     self.group_uuid = None
14855     self.needed_locks = {}
14856
14857     if self.op.kind == constants.TAG_NODE:
14858       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14859       lock_level = locking.LEVEL_NODE
14860       lock_name = self.op.name
14861     elif self.op.kind == constants.TAG_INSTANCE:
14862       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14863       lock_level = locking.LEVEL_INSTANCE
14864       lock_name = self.op.name
14865     elif self.op.kind == constants.TAG_NODEGROUP:
14866       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14867       lock_level = locking.LEVEL_NODEGROUP
14868       lock_name = self.group_uuid
14869     else:
14870       lock_level = None
14871       lock_name = None
14872
14873     if lock_level and getattr(self.op, "use_locking", True):
14874       self.needed_locks[lock_level] = lock_name
14875
14876     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14877     # not possible to acquire the BGL based on opcode parameters)
14878
14879   def CheckPrereq(self):
14880     """Check prerequisites.
14881
14882     """
14883     if self.op.kind == constants.TAG_CLUSTER:
14884       self.target = self.cfg.GetClusterInfo()
14885     elif self.op.kind == constants.TAG_NODE:
14886       self.target = self.cfg.GetNodeInfo(self.op.name)
14887     elif self.op.kind == constants.TAG_INSTANCE:
14888       self.target = self.cfg.GetInstanceInfo(self.op.name)
14889     elif self.op.kind == constants.TAG_NODEGROUP:
14890       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14891     else:
14892       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14893                                  str(self.op.kind), errors.ECODE_INVAL)
14894
14895
14896 class LUTagsGet(TagsLU):
14897   """Returns the tags of a given object.
14898
14899   """
14900   REQ_BGL = False
14901
14902   def ExpandNames(self):
14903     TagsLU.ExpandNames(self)
14904
14905     # Share locks as this is only a read operation
14906     self.share_locks = _ShareAll()
14907
14908   def Exec(self, feedback_fn):
14909     """Returns the tag list.
14910
14911     """
14912     return list(self.target.GetTags())
14913
14914
14915 class LUTagsSearch(NoHooksLU):
14916   """Searches the tags for a given pattern.
14917
14918   """
14919   REQ_BGL = False
14920
14921   def ExpandNames(self):
14922     self.needed_locks = {}
14923
14924   def CheckPrereq(self):
14925     """Check prerequisites.
14926
14927     This checks the pattern passed for validity by compiling it.
14928
14929     """
14930     try:
14931       self.re = re.compile(self.op.pattern)
14932     except re.error, err:
14933       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14934                                  (self.op.pattern, err), errors.ECODE_INVAL)
14935
14936   def Exec(self, feedback_fn):
14937     """Returns the tag list.
14938
14939     """
14940     cfg = self.cfg
14941     tgts = [("/cluster", cfg.GetClusterInfo())]
14942     ilist = cfg.GetAllInstancesInfo().values()
14943     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14944     nlist = cfg.GetAllNodesInfo().values()
14945     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14946     tgts.extend(("/nodegroup/%s" % n.name, n)
14947                 for n in cfg.GetAllNodeGroupsInfo().values())
14948     results = []
14949     for path, target in tgts:
14950       for tag in target.GetTags():
14951         if self.re.search(tag):
14952           results.append((path, tag))
14953     return results
14954
14955
14956 class LUTagsSet(TagsLU):
14957   """Sets a tag on a given object.
14958
14959   """
14960   REQ_BGL = False
14961
14962   def CheckPrereq(self):
14963     """Check prerequisites.
14964
14965     This checks the type and length of the tag name and value.
14966
14967     """
14968     TagsLU.CheckPrereq(self)
14969     for tag in self.op.tags:
14970       objects.TaggableObject.ValidateTag(tag)
14971
14972   def Exec(self, feedback_fn):
14973     """Sets the tag.
14974
14975     """
14976     try:
14977       for tag in self.op.tags:
14978         self.target.AddTag(tag)
14979     except errors.TagError, err:
14980       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14981     self.cfg.Update(self.target, feedback_fn)
14982
14983
14984 class LUTagsDel(TagsLU):
14985   """Delete a list of tags from a given object.
14986
14987   """
14988   REQ_BGL = False
14989
14990   def CheckPrereq(self):
14991     """Check prerequisites.
14992
14993     This checks that we have the given tag.
14994
14995     """
14996     TagsLU.CheckPrereq(self)
14997     for tag in self.op.tags:
14998       objects.TaggableObject.ValidateTag(tag)
14999     del_tags = frozenset(self.op.tags)
15000     cur_tags = self.target.GetTags()
15001
15002     diff_tags = del_tags - cur_tags
15003     if diff_tags:
15004       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15005       raise errors.OpPrereqError("Tag(s) %s not found" %
15006                                  (utils.CommaJoin(diff_names), ),
15007                                  errors.ECODE_NOENT)
15008
15009   def Exec(self, feedback_fn):
15010     """Remove the tag from the object.
15011
15012     """
15013     for tag in self.op.tags:
15014       self.target.RemoveTag(tag)
15015     self.cfg.Update(self.target, feedback_fn)
15016
15017
15018 class LUTestDelay(NoHooksLU):
15019   """Sleep for a specified amount of time.
15020
15021   This LU sleeps on the master and/or nodes for a specified amount of
15022   time.
15023
15024   """
15025   REQ_BGL = False
15026
15027   def ExpandNames(self):
15028     """Expand names and set required locks.
15029
15030     This expands the node list, if any.
15031
15032     """
15033     self.needed_locks = {}
15034     if self.op.on_nodes:
15035       # _GetWantedNodes can be used here, but is not always appropriate to use
15036       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15037       # more information.
15038       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15039       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15040
15041   def _TestDelay(self):
15042     """Do the actual sleep.
15043
15044     """
15045     if self.op.on_master:
15046       if not utils.TestDelay(self.op.duration):
15047         raise errors.OpExecError("Error during master delay test")
15048     if self.op.on_nodes:
15049       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15050       for node, node_result in result.items():
15051         node_result.Raise("Failure during rpc call to node %s" % node)
15052
15053   def Exec(self, feedback_fn):
15054     """Execute the test delay opcode, with the wanted repetitions.
15055
15056     """
15057     if self.op.repeat == 0:
15058       self._TestDelay()
15059     else:
15060       top_value = self.op.repeat - 1
15061       for i in range(self.op.repeat):
15062         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15063         self._TestDelay()
15064
15065
15066 class LURestrictedCommand(NoHooksLU):
15067   """Logical unit for executing restricted commands.
15068
15069   """
15070   REQ_BGL = False
15071
15072   def ExpandNames(self):
15073     if self.op.nodes:
15074       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15075
15076     self.needed_locks = {
15077       locking.LEVEL_NODE: self.op.nodes,
15078       }
15079     self.share_locks = {
15080       locking.LEVEL_NODE: not self.op.use_locking,
15081       }
15082
15083   def CheckPrereq(self):
15084     """Check prerequisites.
15085
15086     """
15087
15088   def Exec(self, feedback_fn):
15089     """Execute restricted command and return output.
15090
15091     """
15092     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15093
15094     # Check if correct locks are held
15095     assert set(self.op.nodes).issubset(owned_nodes)
15096
15097     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15098
15099     result = []
15100
15101     for node_name in self.op.nodes:
15102       nres = rpcres[node_name]
15103       if nres.fail_msg:
15104         msg = ("Command '%s' on node '%s' failed: %s" %
15105                (self.op.command, node_name, nres.fail_msg))
15106         result.append((False, msg))
15107       else:
15108         result.append((True, nres.payload))
15109
15110     return result
15111
15112
15113 class LUTestJqueue(NoHooksLU):
15114   """Utility LU to test some aspects of the job queue.
15115
15116   """
15117   REQ_BGL = False
15118
15119   # Must be lower than default timeout for WaitForJobChange to see whether it
15120   # notices changed jobs
15121   _CLIENT_CONNECT_TIMEOUT = 20.0
15122   _CLIENT_CONFIRM_TIMEOUT = 60.0
15123
15124   @classmethod
15125   def _NotifyUsingSocket(cls, cb, errcls):
15126     """Opens a Unix socket and waits for another program to connect.
15127
15128     @type cb: callable
15129     @param cb: Callback to send socket name to client
15130     @type errcls: class
15131     @param errcls: Exception class to use for errors
15132
15133     """
15134     # Using a temporary directory as there's no easy way to create temporary
15135     # sockets without writing a custom loop around tempfile.mktemp and
15136     # socket.bind
15137     tmpdir = tempfile.mkdtemp()
15138     try:
15139       tmpsock = utils.PathJoin(tmpdir, "sock")
15140
15141       logging.debug("Creating temporary socket at %s", tmpsock)
15142       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15143       try:
15144         sock.bind(tmpsock)
15145         sock.listen(1)
15146
15147         # Send details to client
15148         cb(tmpsock)
15149
15150         # Wait for client to connect before continuing
15151         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15152         try:
15153           (conn, _) = sock.accept()
15154         except socket.error, err:
15155           raise errcls("Client didn't connect in time (%s)" % err)
15156       finally:
15157         sock.close()
15158     finally:
15159       # Remove as soon as client is connected
15160       shutil.rmtree(tmpdir)
15161
15162     # Wait for client to close
15163     try:
15164       try:
15165         # pylint: disable=E1101
15166         # Instance of '_socketobject' has no ... member
15167         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15168         conn.recv(1)
15169       except socket.error, err:
15170         raise errcls("Client failed to confirm notification (%s)" % err)
15171     finally:
15172       conn.close()
15173
15174   def _SendNotification(self, test, arg, sockname):
15175     """Sends a notification to the client.
15176
15177     @type test: string
15178     @param test: Test name
15179     @param arg: Test argument (depends on test)
15180     @type sockname: string
15181     @param sockname: Socket path
15182
15183     """
15184     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15185
15186   def _Notify(self, prereq, test, arg):
15187     """Notifies the client of a test.
15188
15189     @type prereq: bool
15190     @param prereq: Whether this is a prereq-phase test
15191     @type test: string
15192     @param test: Test name
15193     @param arg: Test argument (depends on test)
15194
15195     """
15196     if prereq:
15197       errcls = errors.OpPrereqError
15198     else:
15199       errcls = errors.OpExecError
15200
15201     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15202                                                   test, arg),
15203                                    errcls)
15204
15205   def CheckArguments(self):
15206     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15207     self.expandnames_calls = 0
15208
15209   def ExpandNames(self):
15210     checkargs_calls = getattr(self, "checkargs_calls", 0)
15211     if checkargs_calls < 1:
15212       raise errors.ProgrammerError("CheckArguments was not called")
15213
15214     self.expandnames_calls += 1
15215
15216     if self.op.notify_waitlock:
15217       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15218
15219     self.LogInfo("Expanding names")
15220
15221     # Get lock on master node (just to get a lock, not for a particular reason)
15222     self.needed_locks = {
15223       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15224       }
15225
15226   def Exec(self, feedback_fn):
15227     if self.expandnames_calls < 1:
15228       raise errors.ProgrammerError("ExpandNames was not called")
15229
15230     if self.op.notify_exec:
15231       self._Notify(False, constants.JQT_EXEC, None)
15232
15233     self.LogInfo("Executing")
15234
15235     if self.op.log_messages:
15236       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15237       for idx, msg in enumerate(self.op.log_messages):
15238         self.LogInfo("Sending log message %s", idx + 1)
15239         feedback_fn(constants.JQT_MSGPREFIX + msg)
15240         # Report how many test messages have been sent
15241         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15242
15243     if self.op.fail:
15244       raise errors.OpExecError("Opcode failure was requested")
15245
15246     return True
15247
15248
15249 class LUTestAllocator(NoHooksLU):
15250   """Run allocator tests.
15251
15252   This LU runs the allocator tests
15253
15254   """
15255   def CheckPrereq(self):
15256     """Check prerequisites.
15257
15258     This checks the opcode parameters depending on the director and mode test.
15259
15260     """
15261     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15262                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15263       for attr in ["memory", "disks", "disk_template",
15264                    "os", "tags", "nics", "vcpus"]:
15265         if not hasattr(self.op, attr):
15266           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15267                                      attr, errors.ECODE_INVAL)
15268       iname = self.cfg.ExpandInstanceName(self.op.name)
15269       if iname is not None:
15270         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15271                                    iname, errors.ECODE_EXISTS)
15272       if not isinstance(self.op.nics, list):
15273         raise errors.OpPrereqError("Invalid parameter 'nics'",
15274                                    errors.ECODE_INVAL)
15275       if not isinstance(self.op.disks, list):
15276         raise errors.OpPrereqError("Invalid parameter 'disks'",
15277                                    errors.ECODE_INVAL)
15278       for row in self.op.disks:
15279         if (not isinstance(row, dict) or
15280             constants.IDISK_SIZE not in row or
15281             not isinstance(row[constants.IDISK_SIZE], int) or
15282             constants.IDISK_MODE not in row or
15283             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15284           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15285                                      " parameter", errors.ECODE_INVAL)
15286       if self.op.hypervisor is None:
15287         self.op.hypervisor = self.cfg.GetHypervisorType()
15288     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15289       fname = _ExpandInstanceName(self.cfg, self.op.name)
15290       self.op.name = fname
15291       self.relocate_from = \
15292           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15293     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15294                           constants.IALLOCATOR_MODE_NODE_EVAC):
15295       if not self.op.instances:
15296         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15297       self.op.instances = _GetWantedInstances(self, self.op.instances)
15298     else:
15299       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15300                                  self.op.mode, errors.ECODE_INVAL)
15301
15302     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15303       if self.op.allocator is None:
15304         raise errors.OpPrereqError("Missing allocator name",
15305                                    errors.ECODE_INVAL)
15306     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15307       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15308                                  self.op.direction, errors.ECODE_INVAL)
15309
15310   def Exec(self, feedback_fn):
15311     """Run the allocator test.
15312
15313     """
15314     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15315       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15316                                           memory=self.op.memory,
15317                                           disks=self.op.disks,
15318                                           disk_template=self.op.disk_template,
15319                                           os=self.op.os,
15320                                           tags=self.op.tags,
15321                                           nics=self.op.nics,
15322                                           vcpus=self.op.vcpus,
15323                                           spindle_use=self.op.spindle_use,
15324                                           hypervisor=self.op.hypervisor)
15325     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15326       req = iallocator.IAReqRelocate(name=self.op.name,
15327                                      relocate_from=list(self.relocate_from))
15328     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15329       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15330                                         target_groups=self.op.target_groups)
15331     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15332       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15333                                      evac_mode=self.op.evac_mode)
15334     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15335       disk_template = self.op.disk_template
15336       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15337                                              memory=self.op.memory,
15338                                              disks=self.op.disks,
15339                                              disk_template=disk_template,
15340                                              os=self.op.os,
15341                                              tags=self.op.tags,
15342                                              nics=self.op.nics,
15343                                              vcpus=self.op.vcpus,
15344                                              spindle_use=self.op.spindle_use,
15345                                              hypervisor=self.op.hypervisor)
15346                for idx in range(self.op.count)]
15347       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15348     else:
15349       raise errors.ProgrammerError("Uncatched mode %s in"
15350                                    " LUTestAllocator.Exec", self.op.mode)
15351
15352     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15353     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15354       result = ial.in_text
15355     else:
15356       ial.Run(self.op.allocator, validate=False)
15357       result = ial.out_text
15358     return result
15359
15360 # Network LUs
15361 class LUNetworkAdd(LogicalUnit):
15362   """Logical unit for creating networks.
15363
15364   """
15365   HPATH = "network-add"
15366   HTYPE = constants.HTYPE_NETWORK
15367   REQ_BGL = False
15368
15369   def BuildHooksNodes(self):
15370     """Build hooks nodes.
15371
15372     """
15373     mn = self.cfg.GetMasterNode()
15374     return ([mn], [mn])
15375
15376   def ExpandNames(self):
15377     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15378     self.needed_locks = {}
15379     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15380
15381   def CheckPrereq(self):
15382     """Check prerequisites.
15383
15384     This checks that the given group name is not an existing node group
15385     already.
15386
15387     """
15388     if self.op.network is None:
15389       raise errors.OpPrereqError("Network must be given",
15390                                  errors.ECODE_INVAL)
15391
15392     uuid = self.cfg.LookupNetwork(self.op.network_name)
15393
15394     if uuid:
15395       raise errors.OpPrereqError("Network '%s' already defined" %
15396                                  self.op.network, errors.ECODE_EXISTS)
15397
15398
15399   def BuildHooksEnv(self):
15400     """Build hooks env.
15401
15402     """
15403     env = {
15404       "NETWORK_NAME": self.op.network_name,
15405       "NETWORK_SUBNET": self.op.network,
15406       "NETWORK_GATEWAY": self.op.gateway,
15407       "NETWORK_SUBNET6": self.op.network6,
15408       "NETWORK_GATEWAY6": self.op.gateway6,
15409       "NETWORK_MAC_PREFIX": self.op.mac_prefix,
15410       "NETWORK_TYPE": self.op.network_type,
15411       }
15412     return env
15413
15414   def Exec(self, feedback_fn):
15415     """Add the ip pool to the cluster.
15416
15417     """
15418     nobj = objects.Network(name=self.op.network_name,
15419                            network=self.op.network,
15420                            gateway=self.op.gateway,
15421                            network6=self.op.network6,
15422                            gateway6=self.op.gateway6,
15423                            mac_prefix=self.op.mac_prefix,
15424                            network_type=self.op.network_type,
15425                            uuid=self.network_uuid,
15426                            family=4)
15427     # Initialize the associated address pool
15428     try:
15429       pool = network.AddressPool.InitializeNetwork(nobj)
15430     except errors.AddressPoolError, e:
15431       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15432
15433     # Check if we need to reserve the nodes and the cluster master IP
15434     # These may not be allocated to any instances in routed mode, as
15435     # they wouldn't function anyway.
15436     for node in self.cfg.GetAllNodesInfo().values():
15437       for ip in [node.primary_ip, node.secondary_ip]:
15438         try:
15439           pool.Reserve(ip)
15440           self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15441
15442         except errors.AddressPoolError:
15443           pass
15444
15445     master_ip = self.cfg.GetClusterInfo().master_ip
15446     try:
15447       pool.Reserve(master_ip)
15448       self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15449     except errors.AddressPoolError:
15450       pass
15451
15452     if self.op.add_reserved_ips:
15453       for ip in self.op.add_reserved_ips:
15454         try:
15455           pool.Reserve(ip, external=True)
15456         except errors.AddressPoolError, e:
15457           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15458
15459     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15460     del self.remove_locks[locking.LEVEL_NETWORK]
15461
15462
15463 class LUNetworkRemove(LogicalUnit):
15464   HPATH = "network-remove"
15465   HTYPE = constants.HTYPE_NETWORK
15466   REQ_BGL = False
15467
15468   def ExpandNames(self):
15469     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15470
15471     self.needed_locks = {
15472       locking.LEVEL_NETWORK: [self.network_uuid],
15473       }
15474
15475
15476   def CheckPrereq(self):
15477     """Check prerequisites.
15478
15479     This checks that the given network name exists as a network, that is
15480     empty (i.e., contains no nodes), and that is not the last group of the
15481     cluster.
15482
15483     """
15484     if not self.network_uuid:
15485       raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15486                                  errors.ECODE_INVAL)
15487
15488     # Verify that the network is not conncted.
15489     node_groups = [group.name
15490                    for group in self.cfg.GetAllNodeGroupsInfo().values()
15491                    for network in group.networks.keys()
15492                    if network == self.network_uuid]
15493
15494     if node_groups:
15495       self.LogWarning("Nework '%s' is connected to the following"
15496                       " node groups: %s" % (self.op.network_name,
15497                       utils.CommaJoin(utils.NiceSort(node_groups))))
15498       raise errors.OpPrereqError("Network still connected",
15499                                  errors.ECODE_STATE)
15500
15501   def BuildHooksEnv(self):
15502     """Build hooks env.
15503
15504     """
15505     return {
15506       "NETWORK_NAME": self.op.network_name,
15507       }
15508
15509   def BuildHooksNodes(self):
15510     """Build hooks nodes.
15511
15512     """
15513     mn = self.cfg.GetMasterNode()
15514     return ([mn], [mn])
15515
15516   def Exec(self, feedback_fn):
15517     """Remove the network.
15518
15519     """
15520     try:
15521       self.cfg.RemoveNetwork(self.network_uuid)
15522     except errors.ConfigurationError:
15523       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15524                                (self.op.network_name, self.network_uuid))
15525
15526
15527 class LUNetworkSetParams(LogicalUnit):
15528   """Modifies the parameters of a network.
15529
15530   """
15531   HPATH = "network-modify"
15532   HTYPE = constants.HTYPE_NETWORK
15533   REQ_BGL = False
15534
15535   def CheckArguments(self):
15536     if (self.op.gateway and
15537         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15538       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15539                                  " at once", errors.ECODE_INVAL)
15540
15541
15542   def ExpandNames(self):
15543     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15544     self.network = self.cfg.GetNetwork(self.network_uuid)
15545     self.needed_locks = {
15546       locking.LEVEL_NETWORK: [self.network_uuid],
15547       }
15548
15549
15550     if self.network is None:
15551       raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15552                                  (self.op.network_name, self.network_uuid),
15553                                  errors.ECODE_INVAL)
15554
15555   def CheckPrereq(self):
15556     """Check prerequisites.
15557
15558     """
15559     self.gateway = self.network.gateway
15560     self.network_type = self.network.network_type
15561     self.mac_prefix = self.network.mac_prefix
15562     self.network6 = self.network.network6
15563     self.gateway6 = self.network.gateway6
15564
15565     self.pool = network.AddressPool(self.network)
15566
15567     if self.op.gateway:
15568       if self.op.gateway == constants.VALUE_NONE:
15569         self.gateway = None
15570       else:
15571         self.gateway = self.op.gateway
15572         if self.pool.IsReserved(self.gateway):
15573           raise errors.OpPrereqError("%s is already reserved" %
15574                                      self.gateway, errors.ECODE_INVAL)
15575
15576     if self.op.network_type:
15577       if self.op.network_type == constants.VALUE_NONE:
15578         self.network_type = None
15579       else:
15580         self.network_type = self.op.network_type
15581
15582     if self.op.mac_prefix:
15583       if self.op.mac_prefix == constants.VALUE_NONE:
15584         self.mac_prefix = None
15585       else:
15586         self.mac_prefix = self.op.mac_prefix
15587
15588     if self.op.gateway6:
15589       if self.op.gateway6 == constants.VALUE_NONE:
15590         self.gateway6 = None
15591       else:
15592         self.gateway6 = self.op.gateway6
15593
15594     if self.op.network6:
15595       if self.op.network6 == constants.VALUE_NONE:
15596         self.network6 = None
15597       else:
15598         self.network6 = self.op.network6
15599
15600
15601
15602   def BuildHooksEnv(self):
15603     """Build hooks env.
15604
15605     """
15606     env = {
15607       "NETWORK_NAME": self.op.network_name,
15608       "NETWORK_SUBNET": self.network.network,
15609       "NETWORK_GATEWAY": self.gateway,
15610       "NETWORK_SUBNET6": self.network6,
15611       "NETWORK_GATEWAY6": self.gateway6,
15612       "NETWORK_MAC_PREFIX": self.mac_prefix,
15613       "NETWORK_TYPE": self.network_type,
15614       }
15615     return env
15616
15617   def BuildHooksNodes(self):
15618     """Build hooks nodes.
15619
15620     """
15621     mn = self.cfg.GetMasterNode()
15622     return ([mn], [mn])
15623
15624   def Exec(self, feedback_fn):
15625     """Modifies the network.
15626
15627     """
15628     #TODO: reserve/release via temporary reservation manager
15629     #      extend cfg.ReserveIp/ReleaseIp with the external flag
15630     if self.op.gateway:
15631       if self.gateway == self.network.gateway:
15632         self.LogWarning("Gateway is already %s" % self.gateway)
15633       else:
15634         if self.gateway:
15635           self.pool.Reserve(self.gateway, external=True)
15636         if self.network.gateway:
15637           self.pool.Release(self.network.gateway, external=True)
15638         self.network.gateway = self.gateway
15639
15640     if self.op.add_reserved_ips:
15641       for ip in self.op.add_reserved_ips:
15642         try:
15643           if self.pool.IsReserved(ip):
15644             self.LogWarning("IP %s is already reserved" % ip)
15645           else:
15646             self.pool.Reserve(ip, external=True)
15647         except errors.AddressPoolError, e:
15648           self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15649
15650     if self.op.remove_reserved_ips:
15651       for ip in self.op.remove_reserved_ips:
15652         if ip == self.network.gateway:
15653           self.LogWarning("Cannot unreserve Gateway's IP")
15654           continue
15655         try:
15656           if not self.pool.IsReserved(ip):
15657             self.LogWarning("IP %s is already unreserved" % ip)
15658           else:
15659             self.pool.Release(ip, external=True)
15660         except errors.AddressPoolError, e:
15661           self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15662
15663     if self.op.mac_prefix:
15664       self.network.mac_prefix = self.mac_prefix
15665
15666     if self.op.network6:
15667       self.network.network6 = self.network6
15668
15669     if self.op.gateway6:
15670       self.network.gateway6 = self.gateway6
15671
15672     if self.op.network_type:
15673       self.network.network_type = self.network_type
15674
15675     self.pool.Validate()
15676
15677     self.cfg.Update(self.network, feedback_fn)
15678
15679
15680 class _NetworkQuery(_QueryBase):
15681   FIELDS = query.NETWORK_FIELDS
15682
15683   def ExpandNames(self, lu):
15684     lu.needed_locks = {}
15685
15686     self._all_networks = lu.cfg.GetAllNetworksInfo()
15687     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15688
15689     if not self.names:
15690       self.wanted = [name_to_uuid[name]
15691                      for name in utils.NiceSort(name_to_uuid.keys())]
15692     else:
15693       # Accept names to be either names or UUIDs.
15694       missing = []
15695       self.wanted = []
15696       all_uuid = frozenset(self._all_networks.keys())
15697
15698       for name in self.names:
15699         if name in all_uuid:
15700           self.wanted.append(name)
15701         elif name in name_to_uuid:
15702           self.wanted.append(name_to_uuid[name])
15703         else:
15704           missing.append(name)
15705
15706       if missing:
15707         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15708                                    errors.ECODE_NOENT)
15709
15710   def DeclareLocks(self, lu, level):
15711     pass
15712
15713   def _GetQueryData(self, lu):
15714     """Computes the list of networks and their attributes.
15715
15716     """
15717     do_instances = query.NETQ_INST in self.requested_data
15718     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15719     do_stats = query.NETQ_STATS in self.requested_data
15720     cluster = lu.cfg.GetClusterInfo()
15721
15722     network_to_groups = None
15723     network_to_instances = None
15724     stats = None
15725
15726     # For NETQ_GROUP, we need to map network->[groups]
15727     if do_groups:
15728       all_groups = lu.cfg.GetAllNodeGroupsInfo()
15729       network_to_groups = dict((uuid, []) for uuid in self.wanted)
15730       default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15731
15732       if do_instances:
15733         all_instances = lu.cfg.GetAllInstancesInfo()
15734         all_nodes = lu.cfg.GetAllNodesInfo()
15735         network_to_instances = dict((uuid, []) for uuid in self.wanted)
15736
15737
15738       for group in all_groups.values():
15739         if do_instances:
15740           group_nodes = [node.name for node in all_nodes.values() if
15741                          node.group == group.uuid]
15742           group_instances = [instance for instance in all_instances.values()
15743                              if instance.primary_node in group_nodes]
15744
15745         for net_uuid in group.networks.keys():
15746           if net_uuid in network_to_groups:
15747             netparams = group.networks[net_uuid]
15748             mode = netparams[constants.NIC_MODE]
15749             link = netparams[constants.NIC_LINK]
15750             info = group.name + '(' + mode + ', ' + link + ')'
15751             network_to_groups[net_uuid].append(info)
15752
15753             if do_instances:
15754               for instance in group_instances:
15755                 for nic in instance.nics:
15756                   if nic.network == self._all_networks[net_uuid].name:
15757                     network_to_instances[net_uuid].append(instance.name)
15758                     break
15759
15760     if do_stats:
15761       stats = {}
15762       for uuid, net in self._all_networks.items():
15763         if uuid in self.wanted:
15764           pool = network.AddressPool(net)
15765           stats[uuid] = {
15766             "free_count": pool.GetFreeCount(),
15767             "reserved_count": pool.GetReservedCount(),
15768             "map": pool.GetMap(),
15769             "external_reservations": ", ".join(pool.GetExternalReservations()),
15770             }
15771
15772     return query.NetworkQueryData([self._all_networks[uuid]
15773                                    for uuid in self.wanted],
15774                                    network_to_groups,
15775                                    network_to_instances,
15776                                    stats)
15777
15778
15779 class LUNetworkQuery(NoHooksLU):
15780   """Logical unit for querying networks.
15781
15782   """
15783   REQ_BGL = False
15784
15785   def CheckArguments(self):
15786     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15787                             self.op.output_fields, False)
15788
15789   def ExpandNames(self):
15790     self.nq.ExpandNames(self)
15791
15792   def Exec(self, feedback_fn):
15793     return self.nq.OldStyleQuery(self)
15794
15795
15796
15797 class LUNetworkConnect(LogicalUnit):
15798   """Connect a network to a nodegroup
15799
15800   """
15801   HPATH = "network-connect"
15802   HTYPE = constants.HTYPE_NETWORK
15803   REQ_BGL = False
15804
15805   def ExpandNames(self):
15806     self.network_name = self.op.network_name
15807     self.group_name = self.op.group_name
15808     self.network_mode = self.op.network_mode
15809     self.network_link = self.op.network_link
15810
15811     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15812     self.network = self.cfg.GetNetwork(self.network_uuid)
15813     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15814     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15815
15816     self.needed_locks = {
15817       locking.LEVEL_INSTANCE: [],
15818       locking.LEVEL_NODEGROUP: [self.group_uuid],
15819       }
15820     self.share_locks[locking.LEVEL_INSTANCE] = 1
15821
15822   def DeclareLocks(self, level):
15823     if level == locking.LEVEL_INSTANCE:
15824       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15825
15826       # Lock instances optimistically, needs verification once group lock has
15827       # been acquired
15828       self.needed_locks[locking.LEVEL_INSTANCE] = \
15829           self.cfg.GetNodeGroupInstances(self.group_uuid)
15830
15831   def BuildHooksEnv(self):
15832     ret = dict()
15833     ret["GROUP_NAME"] = self.group_name
15834     ret["GROUP_NETWORK_NAME"] = self.network_name
15835     ret["GROUP_NETWORK_MODE"] = self.network_mode
15836     ret["GROUP_NETWORK_LINK"] = self.network_link
15837     return ret
15838
15839   def BuildHooksNodes(self):
15840     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15841     return (nodes, nodes)
15842
15843
15844   def CheckPrereq(self):
15845     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15846                                    for i in value)
15847
15848     if self.network is None:
15849       raise errors.OpPrereqError("Network %s does not exist" %
15850                                  self.network_name, errors.ECODE_INVAL)
15851
15852     self.netparams = dict()
15853     self.netparams[constants.NIC_MODE] = self.network_mode
15854     self.netparams[constants.NIC_LINK] = self.network_link
15855     objects.NIC.CheckParameterSyntax(self.netparams)
15856
15857     #if self.network_mode == constants.NIC_MODE_BRIDGED:
15858     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
15859     self.connected = False
15860     if self.network_uuid in self.group.networks:
15861       self.LogWarning("Network '%s' is already mapped to group '%s'" %
15862                       (self.network_name, self.group.name))
15863       self.connected = True
15864       return
15865
15866     pool = network.AddressPool(self.network)
15867     if self.op.conflicts_check:
15868       groupinstances = []
15869       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15870         groupinstances.append(self.cfg.GetInstanceInfo(n))
15871       instances = [(instance.name, idx, nic.ip)
15872                    for instance in groupinstances
15873                    for idx, nic in enumerate(instance.nics)
15874                    if (not nic.network and pool._Contains(nic.ip))]
15875       if instances:
15876         self.LogWarning("Following occurences use IPs from network %s"
15877                         " that is about to connect to nodegroup %s: %s" %
15878                         (self.network_name, self.group.name,
15879                         l(instances)))
15880         raise errors.OpPrereqError("Conflicting IPs found."
15881                                    " Please remove/modify"
15882                                    " corresponding NICs",
15883                                    errors.ECODE_INVAL)
15884
15885   def Exec(self, feedback_fn):
15886     if self.connected:
15887       return
15888
15889     self.group.networks[self.network_uuid] = self.netparams
15890     self.cfg.Update(self.group, feedback_fn)
15891
15892
15893 class LUNetworkDisconnect(LogicalUnit):
15894   """Disconnect a network to a nodegroup
15895
15896   """
15897   HPATH = "network-disconnect"
15898   HTYPE = constants.HTYPE_NETWORK
15899   REQ_BGL = False
15900
15901   def ExpandNames(self):
15902     self.network_name = self.op.network_name
15903     self.group_name = self.op.group_name
15904
15905     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15906     self.network = self.cfg.GetNetwork(self.network_uuid)
15907     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15908     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15909
15910     self.needed_locks = {
15911       locking.LEVEL_INSTANCE: [],
15912       locking.LEVEL_NODEGROUP: [self.group_uuid],
15913       }
15914     self.share_locks[locking.LEVEL_INSTANCE] = 1
15915
15916   def DeclareLocks(self, level):
15917     if level == locking.LEVEL_INSTANCE:
15918       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15919
15920       # Lock instances optimistically, needs verification once group lock has
15921       # been acquired
15922       self.needed_locks[locking.LEVEL_INSTANCE] = \
15923           self.cfg.GetNodeGroupInstances(self.group_uuid)
15924
15925   def BuildHooksEnv(self):
15926     ret = dict()
15927     ret["GROUP_NAME"] = self.group_name
15928     ret["GROUP_NETWORK_NAME"] = self.network_name
15929     return ret
15930
15931   def BuildHooksNodes(self):
15932     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15933     return (nodes, nodes)
15934
15935
15936   def CheckPrereq(self):
15937     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15938                                    for i in value)
15939
15940     self.connected = True
15941     if self.network_uuid not in self.group.networks:
15942       self.LogWarning("Network '%s' is"
15943                          " not mapped to group '%s'" %
15944                          (self.network_name, self.group.name))
15945       self.connected = False
15946       return
15947
15948     if self.op.conflicts_check:
15949       groupinstances = []
15950       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15951         groupinstances.append(self.cfg.GetInstanceInfo(n))
15952       instances = [(instance.name, idx, nic.ip)
15953                    for instance in groupinstances
15954                    for idx, nic in enumerate(instance.nics)
15955                    if nic.network == self.network_name]
15956       if instances:
15957         self.LogWarning("Following occurences use IPs from network %s"
15958                            " that is about to disconnected from the nodegroup"
15959                            " %s: %s" %
15960                            (self.network_name, self.group.name,
15961                             l(instances)))
15962         raise errors.OpPrereqError("Conflicting IPs."
15963                                    " Please remove/modify"
15964                                    " corresponding NICS",
15965                                    errors.ECODE_INVAL)
15966
15967   def Exec(self, feedback_fn):
15968     if not self.connected:
15969       return
15970
15971     del self.group.networks[self.network_uuid]
15972     self.cfg.Update(self.group, feedback_fn)
15973
15974
15975 #: Query type implementations
15976 _QUERY_IMPL = {
15977   constants.QR_CLUSTER: _ClusterQuery,
15978   constants.QR_INSTANCE: _InstanceQuery,
15979   constants.QR_NODE: _NodeQuery,
15980   constants.QR_GROUP: _GroupQuery,
15981   constants.QR_NETWORK: _NetworkQuery,
15982   constants.QR_OS: _OsQuery,
15983   constants.QR_EXPORT: _ExportQuery,
15984   }
15985
15986 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15987
15988
15989 def _GetQueryImplementation(name):
15990   """Returns the implemtnation for a query type.
15991
15992   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15993
15994   """
15995   try:
15996     return _QUERY_IMPL[name]
15997   except KeyError:
15998     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15999                                errors.ECODE_INVAL)
16000
16001 def _CheckForConflictingIp(lu, ip, node):
16002   """In case of conflicting ip raise error.
16003
16004   @type ip: string
16005   @param ip: ip address
16006   @type node: string
16007   @param node: node name
16008
16009   """
16010   (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16011   if conf_net is not None:
16012     raise errors.OpPrereqError("Conflicting IP found:"
16013                                " %s <> %s." % (ip, conf_net),
16014                                errors.ECODE_INVAL)
16015
16016   return (None, None)