code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckGlobalHvParams(params):
1024   """Validates that given hypervisor params are not global ones.
1025
1026   This will ensure that instances don't get customised versions of
1027   global params.
1028
1029   """
1030   used_globals = constants.HVC_GLOBALS.intersection(params)
1031   if used_globals:
1032     msg = ("The following hypervisor parameters are global and cannot"
1033            " be customized at instance level, please modify them at"
1034            " cluster level: %s" % utils.CommaJoin(used_globals))
1035     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1036
1037
1038 def _CheckNodeOnline(lu, node, msg=None):
1039   """Ensure that a given node is online.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @param msg: if passed, should be a message to replace the default one
1044   @raise errors.OpPrereqError: if the node is offline
1045
1046   """
1047   if msg is None:
1048     msg = "Can't use offline node"
1049   if lu.cfg.GetNodeInfo(node).offline:
1050     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1051
1052
1053 def _CheckNodeNotDrained(lu, node):
1054   """Ensure that a given node is not drained.
1055
1056   @param lu: the LU on behalf of which we make the check
1057   @param node: the node to check
1058   @raise errors.OpPrereqError: if the node is drained
1059
1060   """
1061   if lu.cfg.GetNodeInfo(node).drained:
1062     raise errors.OpPrereqError("Can't use drained node %s" % node,
1063                                errors.ECODE_STATE)
1064
1065
1066 def _CheckNodeVmCapable(lu, node):
1067   """Ensure that a given node is vm capable.
1068
1069   @param lu: the LU on behalf of which we make the check
1070   @param node: the node to check
1071   @raise errors.OpPrereqError: if the node is not vm capable
1072
1073   """
1074   if not lu.cfg.GetNodeInfo(node).vm_capable:
1075     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1076                                errors.ECODE_STATE)
1077
1078
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080   """Ensure that a node supports a given OS.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param node: the node to check
1084   @param os_name: the OS to query about
1085   @param force_variant: whether to ignore variant errors
1086   @raise errors.OpPrereqError: if the node is not supporting the OS
1087
1088   """
1089   result = lu.rpc.call_os_get(node, os_name)
1090   result.Raise("OS '%s' not in supported OS list for node %s" %
1091                (os_name, node),
1092                prereq=True, ecode=errors.ECODE_INVAL)
1093   if not force_variant:
1094     _CheckOSVariant(result.payload, os_name)
1095
1096
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098   """Ensure that a node has the given secondary ip.
1099
1100   @type lu: L{LogicalUnit}
1101   @param lu: the LU on behalf of which we make the check
1102   @type node: string
1103   @param node: the node to check
1104   @type secondary_ip: string
1105   @param secondary_ip: the ip to check
1106   @type prereq: boolean
1107   @param prereq: whether to throw a prerequisite or an execute error
1108   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1110
1111   """
1112   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113   result.Raise("Failure checking secondary ip on node %s" % node,
1114                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115   if not result.payload:
1116     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117            " please fix and re-run this command" % secondary_ip)
1118     if prereq:
1119       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1120     else:
1121       raise errors.OpExecError(msg)
1122
1123
1124 def _GetClusterDomainSecret():
1125   """Reads the cluster domain secret.
1126
1127   """
1128   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1129                                strict=True)
1130
1131
1132 def _CheckInstanceState(lu, instance, req_states, msg=None):
1133   """Ensure that an instance is in one of the required states.
1134
1135   @param lu: the LU on behalf of which we make the check
1136   @param instance: the instance to check
1137   @param msg: if passed, should be a message to replace the default one
1138   @raise errors.OpPrereqError: if the instance is not in the required state
1139
1140   """
1141   if msg is None:
1142     msg = ("can't use instance from outside %s states" %
1143            utils.CommaJoin(req_states))
1144   if instance.admin_state not in req_states:
1145     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1146                                (instance.name, instance.admin_state, msg),
1147                                errors.ECODE_STATE)
1148
1149   if constants.ADMINST_UP not in req_states:
1150     pnode = instance.primary_node
1151     if not lu.cfg.GetNodeInfo(pnode).offline:
1152       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1153       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1154                   prereq=True, ecode=errors.ECODE_ENVIRON)
1155       if instance.name in ins_l.payload:
1156         raise errors.OpPrereqError("Instance %s is running, %s" %
1157                                    (instance.name, msg), errors.ECODE_STATE)
1158     else:
1159       lu.LogWarning("Primary node offline, ignoring check that instance"
1160                      " is down")
1161
1162
1163 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1164   """Computes if value is in the desired range.
1165
1166   @param name: name of the parameter for which we perform the check
1167   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1168       not just 'disk')
1169   @param ipolicy: dictionary containing min, max and std values
1170   @param value: actual value that we want to use
1171   @return: None or element not meeting the criteria
1172
1173
1174   """
1175   if value in [None, constants.VALUE_AUTO]:
1176     return None
1177   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1178   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1179   if value > max_v or min_v > value:
1180     if qualifier:
1181       fqn = "%s/%s" % (name, qualifier)
1182     else:
1183       fqn = name
1184     return ("%s value %s is not in range [%s, %s]" %
1185             (fqn, value, min_v, max_v))
1186   return None
1187
1188
1189 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1190                                  nic_count, disk_sizes, spindle_use,
1191                                  _compute_fn=_ComputeMinMaxSpec):
1192   """Verifies ipolicy against provided specs.
1193
1194   @type ipolicy: dict
1195   @param ipolicy: The ipolicy
1196   @type mem_size: int
1197   @param mem_size: The memory size
1198   @type cpu_count: int
1199   @param cpu_count: Used cpu cores
1200   @type disk_count: int
1201   @param disk_count: Number of disks used
1202   @type nic_count: int
1203   @param nic_count: Number of nics used
1204   @type disk_sizes: list of ints
1205   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1206   @type spindle_use: int
1207   @param spindle_use: The number of spindles this instance uses
1208   @param _compute_fn: The compute function (unittest only)
1209   @return: A list of violations, or an empty list of no violations are found
1210
1211   """
1212   assert disk_count == len(disk_sizes)
1213
1214   test_settings = [
1215     (constants.ISPEC_MEM_SIZE, "", mem_size),
1216     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1217     (constants.ISPEC_DISK_COUNT, "", disk_count),
1218     (constants.ISPEC_NIC_COUNT, "", nic_count),
1219     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1220     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1221          for idx, d in enumerate(disk_sizes)]
1222
1223   return filter(None,
1224                 (_compute_fn(name, qualifier, ipolicy, value)
1225                  for (name, qualifier, value) in test_settings))
1226
1227
1228 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1229                                      _compute_fn=_ComputeIPolicySpecViolation):
1230   """Compute if instance meets the specs of ipolicy.
1231
1232   @type ipolicy: dict
1233   @param ipolicy: The ipolicy to verify against
1234   @type instance: L{objects.Instance}
1235   @param instance: The instance to verify
1236   @param _compute_fn: The function to verify ipolicy (unittest only)
1237   @see: L{_ComputeIPolicySpecViolation}
1238
1239   """
1240   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1241   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1242   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1243   disk_count = len(instance.disks)
1244   disk_sizes = [disk.size for disk in instance.disks]
1245   nic_count = len(instance.nics)
1246
1247   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1248                      disk_sizes, spindle_use)
1249
1250
1251 def _ComputeIPolicyInstanceSpecViolation(
1252   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1253   """Compute if instance specs meets the specs of ipolicy.
1254
1255   @type ipolicy: dict
1256   @param ipolicy: The ipolicy to verify against
1257   @param instance_spec: dict
1258   @param instance_spec: The instance spec to verify
1259   @param _compute_fn: The function to verify ipolicy (unittest only)
1260   @see: L{_ComputeIPolicySpecViolation}
1261
1262   """
1263   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1264   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1265   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1266   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1267   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1268   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1269
1270   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1271                      disk_sizes, spindle_use)
1272
1273
1274 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1275                                  target_group,
1276                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1277   """Compute if instance meets the specs of the new target group.
1278
1279   @param ipolicy: The ipolicy to verify
1280   @param instance: The instance object to verify
1281   @param current_group: The current group of the instance
1282   @param target_group: The new group of the instance
1283   @param _compute_fn: The function to verify ipolicy (unittest only)
1284   @see: L{_ComputeIPolicySpecViolation}
1285
1286   """
1287   if current_group == target_group:
1288     return []
1289   else:
1290     return _compute_fn(ipolicy, instance)
1291
1292
1293 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1294                             _compute_fn=_ComputeIPolicyNodeViolation):
1295   """Checks that the target node is correct in terms of instance policy.
1296
1297   @param ipolicy: The ipolicy to verify
1298   @param instance: The instance object to verify
1299   @param node: The new node to relocate
1300   @param ignore: Ignore violations of the ipolicy
1301   @param _compute_fn: The function to verify ipolicy (unittest only)
1302   @see: L{_ComputeIPolicySpecViolation}
1303
1304   """
1305   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1306   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1307
1308   if res:
1309     msg = ("Instance does not meet target node group's (%s) instance"
1310            " policy: %s") % (node.group, utils.CommaJoin(res))
1311     if ignore:
1312       lu.LogWarning(msg)
1313     else:
1314       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1315
1316
1317 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1318   """Computes a set of any instances that would violate the new ipolicy.
1319
1320   @param old_ipolicy: The current (still in-place) ipolicy
1321   @param new_ipolicy: The new (to become) ipolicy
1322   @param instances: List of instances to verify
1323   @return: A list of instances which violates the new ipolicy but
1324       did not before
1325
1326   """
1327   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1328           _ComputeViolatingInstances(old_ipolicy, instances))
1329
1330
1331 def _ExpandItemName(fn, name, kind):
1332   """Expand an item name.
1333
1334   @param fn: the function to use for expansion
1335   @param name: requested item name
1336   @param kind: text description ('Node' or 'Instance')
1337   @return: the resolved (full) name
1338   @raise errors.OpPrereqError: if the item is not found
1339
1340   """
1341   full_name = fn(name)
1342   if full_name is None:
1343     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1344                                errors.ECODE_NOENT)
1345   return full_name
1346
1347
1348 def _ExpandNodeName(cfg, name):
1349   """Wrapper over L{_ExpandItemName} for nodes."""
1350   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1351
1352
1353 def _ExpandInstanceName(cfg, name):
1354   """Wrapper over L{_ExpandItemName} for instance."""
1355   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1356
1357
1358 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1359                          network_type, mac_prefix, tags):
1360   """Builds network related env variables for hooks
1361
1362   This builds the hook environment from individual variables.
1363
1364   @type name: string
1365   @param name: the name of the network
1366   @type subnet: string
1367   @param subnet: the ipv4 subnet
1368   @type gateway: string
1369   @param gateway: the ipv4 gateway
1370   @type network6: string
1371   @param network6: the ipv6 subnet
1372   @type gateway6: string
1373   @param gateway6: the ipv6 gateway
1374   @type network_type: string
1375   @param network_type: the type of the network
1376   @type mac_prefix: string
1377   @param mac_prefix: the mac_prefix
1378   @type tags: list
1379   @param tags: the tags of the network
1380
1381   """
1382   env = {}
1383   if name:
1384     env["NETWORK_NAME"] = name
1385   if subnet:
1386     env["NETWORK_SUBNET"] = subnet
1387   if gateway:
1388     env["NETWORK_GATEWAY"] = gateway
1389   if network6:
1390     env["NETWORK_SUBNET6"] = network6
1391   if gateway6:
1392     env["NETWORK_GATEWAY6"] = gateway6
1393   if mac_prefix:
1394     env["NETWORK_MAC_PREFIX"] = mac_prefix
1395   if network_type:
1396     env["NETWORK_TYPE"] = network_type
1397   if tags:
1398     env["NETWORK_TAGS"] = " ".join(tags)
1399
1400   return env
1401
1402
1403 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1404                           minmem, maxmem, vcpus, nics, disk_template, disks,
1405                           bep, hvp, hypervisor_name, tags):
1406   """Builds instance related env variables for hooks
1407
1408   This builds the hook environment from individual variables.
1409
1410   @type name: string
1411   @param name: the name of the instance
1412   @type primary_node: string
1413   @param primary_node: the name of the instance's primary node
1414   @type secondary_nodes: list
1415   @param secondary_nodes: list of secondary nodes as strings
1416   @type os_type: string
1417   @param os_type: the name of the instance's OS
1418   @type status: string
1419   @param status: the desired status of the instance
1420   @type minmem: string
1421   @param minmem: the minimum memory size of the instance
1422   @type maxmem: string
1423   @param maxmem: the maximum memory size of the instance
1424   @type vcpus: string
1425   @param vcpus: the count of VCPUs the instance has
1426   @type nics: list
1427   @param nics: list of tuples (ip, mac, mode, link, network) representing
1428       the NICs the instance has
1429   @type disk_template: string
1430   @param disk_template: the disk template of the instance
1431   @type disks: list
1432   @param disks: the list of (size, mode) pairs
1433   @type bep: dict
1434   @param bep: the backend parameters for the instance
1435   @type hvp: dict
1436   @param hvp: the hypervisor parameters for the instance
1437   @type hypervisor_name: string
1438   @param hypervisor_name: the hypervisor for the instance
1439   @type tags: list
1440   @param tags: list of instance tags as strings
1441   @rtype: dict
1442   @return: the hook environment for this instance
1443
1444   """
1445   env = {
1446     "OP_TARGET": name,
1447     "INSTANCE_NAME": name,
1448     "INSTANCE_PRIMARY": primary_node,
1449     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1450     "INSTANCE_OS_TYPE": os_type,
1451     "INSTANCE_STATUS": status,
1452     "INSTANCE_MINMEM": minmem,
1453     "INSTANCE_MAXMEM": maxmem,
1454     # TODO(2.7) remove deprecated "memory" value
1455     "INSTANCE_MEMORY": maxmem,
1456     "INSTANCE_VCPUS": vcpus,
1457     "INSTANCE_DISK_TEMPLATE": disk_template,
1458     "INSTANCE_HYPERVISOR": hypervisor_name,
1459   }
1460   if nics:
1461     nic_count = len(nics)
1462     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1463       if ip is None:
1464         ip = ""
1465       env["INSTANCE_NIC%d_IP" % idx] = ip
1466       env["INSTANCE_NIC%d_MAC" % idx] = mac
1467       env["INSTANCE_NIC%d_MODE" % idx] = mode
1468       env["INSTANCE_NIC%d_LINK" % idx] = link
1469       if network:
1470         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1471         if netinfo:
1472           nobj = objects.Network.FromDict(netinfo)
1473           if nobj.network:
1474             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1475           if nobj.gateway:
1476             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1477           if nobj.network6:
1478             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1479           if nobj.gateway6:
1480             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1481           if nobj.mac_prefix:
1482             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1483           if nobj.network_type:
1484             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1485           if nobj.tags:
1486             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1487       if mode == constants.NIC_MODE_BRIDGED:
1488         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1489   else:
1490     nic_count = 0
1491
1492   env["INSTANCE_NIC_COUNT"] = nic_count
1493
1494   if disks:
1495     disk_count = len(disks)
1496     for idx, (size, mode) in enumerate(disks):
1497       env["INSTANCE_DISK%d_SIZE" % idx] = size
1498       env["INSTANCE_DISK%d_MODE" % idx] = mode
1499   else:
1500     disk_count = 0
1501
1502   env["INSTANCE_DISK_COUNT"] = disk_count
1503
1504   if not tags:
1505     tags = []
1506
1507   env["INSTANCE_TAGS"] = " ".join(tags)
1508
1509   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1510     for key, value in source.items():
1511       env["INSTANCE_%s_%s" % (kind, key)] = value
1512
1513   return env
1514
1515
1516 def _NICToTuple(lu, nic):
1517   """Build a tupple of nic information.
1518
1519   @type lu:  L{LogicalUnit}
1520   @param lu: the logical unit on whose behalf we execute
1521   @type nic: L{objects.NIC}
1522   @param nic: nic to convert to hooks tuple
1523
1524   """
1525   ip = nic.ip
1526   mac = nic.mac
1527   cluster = lu.cfg.GetClusterInfo()
1528   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1529   mode = filled_params[constants.NIC_MODE]
1530   link = filled_params[constants.NIC_LINK]
1531   net = nic.network
1532   netinfo = None
1533   if net:
1534     net_uuid = lu.cfg.LookupNetwork(net)
1535     if net_uuid:
1536       nobj = lu.cfg.GetNetwork(net_uuid)
1537       netinfo = objects.Network.ToDict(nobj)
1538   return (ip, mac, mode, link, net, netinfo)
1539
1540
1541 def _NICListToTuple(lu, nics):
1542   """Build a list of nic information tuples.
1543
1544   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1545   value in LUInstanceQueryData.
1546
1547   @type lu:  L{LogicalUnit}
1548   @param lu: the logical unit on whose behalf we execute
1549   @type nics: list of L{objects.NIC}
1550   @param nics: list of nics to convert to hooks tuples
1551
1552   """
1553   hooks_nics = []
1554   for nic in nics:
1555     hooks_nics.append(_NICToTuple(lu, nic))
1556   return hooks_nics
1557
1558
1559 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1560   """Builds instance related env variables for hooks from an object.
1561
1562   @type lu: L{LogicalUnit}
1563   @param lu: the logical unit on whose behalf we execute
1564   @type instance: L{objects.Instance}
1565   @param instance: the instance for which we should build the
1566       environment
1567   @type override: dict
1568   @param override: dictionary with key/values that will override
1569       our values
1570   @rtype: dict
1571   @return: the hook environment dictionary
1572
1573   """
1574   cluster = lu.cfg.GetClusterInfo()
1575   bep = cluster.FillBE(instance)
1576   hvp = cluster.FillHV(instance)
1577   args = {
1578     "name": instance.name,
1579     "primary_node": instance.primary_node,
1580     "secondary_nodes": instance.secondary_nodes,
1581     "os_type": instance.os,
1582     "status": instance.admin_state,
1583     "maxmem": bep[constants.BE_MAXMEM],
1584     "minmem": bep[constants.BE_MINMEM],
1585     "vcpus": bep[constants.BE_VCPUS],
1586     "nics": _NICListToTuple(lu, instance.nics),
1587     "disk_template": instance.disk_template,
1588     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1589     "bep": bep,
1590     "hvp": hvp,
1591     "hypervisor_name": instance.hypervisor,
1592     "tags": instance.tags,
1593   }
1594   if override:
1595     args.update(override)
1596   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1597
1598
1599 def _AdjustCandidatePool(lu, exceptions):
1600   """Adjust the candidate pool after node operations.
1601
1602   """
1603   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1604   if mod_list:
1605     lu.LogInfo("Promoted nodes to master candidate role: %s",
1606                utils.CommaJoin(node.name for node in mod_list))
1607     for name in mod_list:
1608       lu.context.ReaddNode(name)
1609   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1610   if mc_now > mc_max:
1611     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1612                (mc_now, mc_max))
1613
1614
1615 def _DecideSelfPromotion(lu, exceptions=None):
1616   """Decide whether I should promote myself as a master candidate.
1617
1618   """
1619   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1620   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1621   # the new node will increase mc_max with one, so:
1622   mc_should = min(mc_should + 1, cp_size)
1623   return mc_now < mc_should
1624
1625
1626 def _ComputeViolatingInstances(ipolicy, instances):
1627   """Computes a set of instances who violates given ipolicy.
1628
1629   @param ipolicy: The ipolicy to verify
1630   @type instances: object.Instance
1631   @param instances: List of instances to verify
1632   @return: A frozenset of instance names violating the ipolicy
1633
1634   """
1635   return frozenset([inst.name for inst in instances
1636                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1637
1638
1639 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1640   """Check that the brigdes needed by a list of nics exist.
1641
1642   """
1643   cluster = lu.cfg.GetClusterInfo()
1644   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1645   brlist = [params[constants.NIC_LINK] for params in paramslist
1646             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1647   if brlist:
1648     result = lu.rpc.call_bridges_exist(target_node, brlist)
1649     result.Raise("Error checking bridges on destination node '%s'" %
1650                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1651
1652
1653 def _CheckInstanceBridgesExist(lu, instance, node=None):
1654   """Check that the brigdes needed by an instance exist.
1655
1656   """
1657   if node is None:
1658     node = instance.primary_node
1659   _CheckNicsBridgesExist(lu, instance.nics, node)
1660
1661
1662 def _CheckOSVariant(os_obj, name):
1663   """Check whether an OS name conforms to the os variants specification.
1664
1665   @type os_obj: L{objects.OS}
1666   @param os_obj: OS object to check
1667   @type name: string
1668   @param name: OS name passed by the user, to check for validity
1669
1670   """
1671   variant = objects.OS.GetVariant(name)
1672   if not os_obj.supported_variants:
1673     if variant:
1674       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1675                                  " passed)" % (os_obj.name, variant),
1676                                  errors.ECODE_INVAL)
1677     return
1678   if not variant:
1679     raise errors.OpPrereqError("OS name must include a variant",
1680                                errors.ECODE_INVAL)
1681
1682   if variant not in os_obj.supported_variants:
1683     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1684
1685
1686 def _GetNodeInstancesInner(cfg, fn):
1687   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1688
1689
1690 def _GetNodeInstances(cfg, node_name):
1691   """Returns a list of all primary and secondary instances on a node.
1692
1693   """
1694
1695   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1696
1697
1698 def _GetNodePrimaryInstances(cfg, node_name):
1699   """Returns primary instances on a node.
1700
1701   """
1702   return _GetNodeInstancesInner(cfg,
1703                                 lambda inst: node_name == inst.primary_node)
1704
1705
1706 def _GetNodeSecondaryInstances(cfg, node_name):
1707   """Returns secondary instances on a node.
1708
1709   """
1710   return _GetNodeInstancesInner(cfg,
1711                                 lambda inst: node_name in inst.secondary_nodes)
1712
1713
1714 def _GetStorageTypeArgs(cfg, storage_type):
1715   """Returns the arguments for a storage type.
1716
1717   """
1718   # Special case for file storage
1719   if storage_type == constants.ST_FILE:
1720     # storage.FileStorage wants a list of storage directories
1721     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1722
1723   return []
1724
1725
1726 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1727   faulty = []
1728
1729   for dev in instance.disks:
1730     cfg.SetDiskID(dev, node_name)
1731
1732   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1733                                                                 instance))
1734   result.Raise("Failed to get disk status from node %s" % node_name,
1735                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1736
1737   for idx, bdev_status in enumerate(result.payload):
1738     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1739       faulty.append(idx)
1740
1741   return faulty
1742
1743
1744 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1745   """Check the sanity of iallocator and node arguments and use the
1746   cluster-wide iallocator if appropriate.
1747
1748   Check that at most one of (iallocator, node) is specified. If none is
1749   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1750   then the LU's opcode's iallocator slot is filled with the cluster-wide
1751   default iallocator.
1752
1753   @type iallocator_slot: string
1754   @param iallocator_slot: the name of the opcode iallocator slot
1755   @type node_slot: string
1756   @param node_slot: the name of the opcode target node slot
1757
1758   """
1759   node = getattr(lu.op, node_slot, None)
1760   ialloc = getattr(lu.op, iallocator_slot, None)
1761   if node == []:
1762     node = None
1763
1764   if node is not None and ialloc is not None:
1765     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1766                                errors.ECODE_INVAL)
1767   elif ((node is None and ialloc is None) or
1768         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1769     default_iallocator = lu.cfg.GetDefaultIAllocator()
1770     if default_iallocator:
1771       setattr(lu.op, iallocator_slot, default_iallocator)
1772     else:
1773       raise errors.OpPrereqError("No iallocator or node given and no"
1774                                  " cluster-wide default iallocator found;"
1775                                  " please specify either an iallocator or a"
1776                                  " node, or set a cluster-wide default"
1777                                  " iallocator", errors.ECODE_INVAL)
1778
1779
1780 def _GetDefaultIAllocator(cfg, ialloc):
1781   """Decides on which iallocator to use.
1782
1783   @type cfg: L{config.ConfigWriter}
1784   @param cfg: Cluster configuration object
1785   @type ialloc: string or None
1786   @param ialloc: Iallocator specified in opcode
1787   @rtype: string
1788   @return: Iallocator name
1789
1790   """
1791   if not ialloc:
1792     # Use default iallocator
1793     ialloc = cfg.GetDefaultIAllocator()
1794
1795   if not ialloc:
1796     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1797                                " opcode nor as a cluster-wide default",
1798                                errors.ECODE_INVAL)
1799
1800   return ialloc
1801
1802
1803 def _CheckHostnameSane(lu, name):
1804   """Ensures that a given hostname resolves to a 'sane' name.
1805
1806   The given name is required to be a prefix of the resolved hostname,
1807   to prevent accidental mismatches.
1808
1809   @param lu: the logical unit on behalf of which we're checking
1810   @param name: the name we should resolve and check
1811   @return: the resolved hostname object
1812
1813   """
1814   hostname = netutils.GetHostname(name=name)
1815   if hostname.name != name:
1816     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1817   if not utils.MatchNameComponent(name, [hostname.name]):
1818     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1819                                 " same as given hostname '%s'") %
1820                                 (hostname.name, name), errors.ECODE_INVAL)
1821   return hostname
1822
1823
1824 class LUClusterPostInit(LogicalUnit):
1825   """Logical unit for running hooks after cluster initialization.
1826
1827   """
1828   HPATH = "cluster-init"
1829   HTYPE = constants.HTYPE_CLUSTER
1830
1831   def BuildHooksEnv(self):
1832     """Build hooks env.
1833
1834     """
1835     return {
1836       "OP_TARGET": self.cfg.GetClusterName(),
1837       }
1838
1839   def BuildHooksNodes(self):
1840     """Build hooks nodes.
1841
1842     """
1843     return ([], [self.cfg.GetMasterNode()])
1844
1845   def Exec(self, feedback_fn):
1846     """Nothing to do.
1847
1848     """
1849     return True
1850
1851
1852 class LUClusterDestroy(LogicalUnit):
1853   """Logical unit for destroying the cluster.
1854
1855   """
1856   HPATH = "cluster-destroy"
1857   HTYPE = constants.HTYPE_CLUSTER
1858
1859   def BuildHooksEnv(self):
1860     """Build hooks env.
1861
1862     """
1863     return {
1864       "OP_TARGET": self.cfg.GetClusterName(),
1865       }
1866
1867   def BuildHooksNodes(self):
1868     """Build hooks nodes.
1869
1870     """
1871     return ([], [])
1872
1873   def CheckPrereq(self):
1874     """Check prerequisites.
1875
1876     This checks whether the cluster is empty.
1877
1878     Any errors are signaled by raising errors.OpPrereqError.
1879
1880     """
1881     master = self.cfg.GetMasterNode()
1882
1883     nodelist = self.cfg.GetNodeList()
1884     if len(nodelist) != 1 or nodelist[0] != master:
1885       raise errors.OpPrereqError("There are still %d node(s) in"
1886                                  " this cluster." % (len(nodelist) - 1),
1887                                  errors.ECODE_INVAL)
1888     instancelist = self.cfg.GetInstanceList()
1889     if instancelist:
1890       raise errors.OpPrereqError("There are still %d instance(s) in"
1891                                  " this cluster." % len(instancelist),
1892                                  errors.ECODE_INVAL)
1893
1894   def Exec(self, feedback_fn):
1895     """Destroys the cluster.
1896
1897     """
1898     master_params = self.cfg.GetMasterNetworkParameters()
1899
1900     # Run post hooks on master node before it's removed
1901     _RunPostHook(self, master_params.name)
1902
1903     ems = self.cfg.GetUseExternalMipScript()
1904     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1905                                                      master_params, ems)
1906     if result.fail_msg:
1907       self.LogWarning("Error disabling the master IP address: %s",
1908                       result.fail_msg)
1909
1910     return master_params.name
1911
1912
1913 def _VerifyCertificate(filename):
1914   """Verifies a certificate for L{LUClusterVerifyConfig}.
1915
1916   @type filename: string
1917   @param filename: Path to PEM file
1918
1919   """
1920   try:
1921     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1922                                            utils.ReadFile(filename))
1923   except Exception, err: # pylint: disable=W0703
1924     return (LUClusterVerifyConfig.ETYPE_ERROR,
1925             "Failed to load X509 certificate %s: %s" % (filename, err))
1926
1927   (errcode, msg) = \
1928     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1929                                 constants.SSL_CERT_EXPIRATION_ERROR)
1930
1931   if msg:
1932     fnamemsg = "While verifying %s: %s" % (filename, msg)
1933   else:
1934     fnamemsg = None
1935
1936   if errcode is None:
1937     return (None, fnamemsg)
1938   elif errcode == utils.CERT_WARNING:
1939     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1940   elif errcode == utils.CERT_ERROR:
1941     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1942
1943   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1944
1945
1946 def _GetAllHypervisorParameters(cluster, instances):
1947   """Compute the set of all hypervisor parameters.
1948
1949   @type cluster: L{objects.Cluster}
1950   @param cluster: the cluster object
1951   @param instances: list of L{objects.Instance}
1952   @param instances: additional instances from which to obtain parameters
1953   @rtype: list of (origin, hypervisor, parameters)
1954   @return: a list with all parameters found, indicating the hypervisor they
1955        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1956
1957   """
1958   hvp_data = []
1959
1960   for hv_name in cluster.enabled_hypervisors:
1961     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1962
1963   for os_name, os_hvp in cluster.os_hvp.items():
1964     for hv_name, hv_params in os_hvp.items():
1965       if hv_params:
1966         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1967         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1968
1969   # TODO: collapse identical parameter values in a single one
1970   for instance in instances:
1971     if instance.hvparams:
1972       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1973                        cluster.FillHV(instance)))
1974
1975   return hvp_data
1976
1977
1978 class _VerifyErrors(object):
1979   """Mix-in for cluster/group verify LUs.
1980
1981   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1982   self.op and self._feedback_fn to be available.)
1983
1984   """
1985
1986   ETYPE_FIELD = "code"
1987   ETYPE_ERROR = "ERROR"
1988   ETYPE_WARNING = "WARNING"
1989
1990   def _Error(self, ecode, item, msg, *args, **kwargs):
1991     """Format an error message.
1992
1993     Based on the opcode's error_codes parameter, either format a
1994     parseable error code, or a simpler error string.
1995
1996     This must be called only from Exec and functions called from Exec.
1997
1998     """
1999     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2000     itype, etxt, _ = ecode
2001     # first complete the msg
2002     if args:
2003       msg = msg % args
2004     # then format the whole message
2005     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2006       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2007     else:
2008       if item:
2009         item = " " + item
2010       else:
2011         item = ""
2012       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2013     # and finally report it via the feedback_fn
2014     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2015
2016   def _ErrorIf(self, cond, ecode, *args, **kwargs):
2017     """Log an error message if the passed condition is True.
2018
2019     """
2020     cond = (bool(cond)
2021             or self.op.debug_simulate_errors) # pylint: disable=E1101
2022
2023     # If the error code is in the list of ignored errors, demote the error to a
2024     # warning
2025     (_, etxt, _) = ecode
2026     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2027       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2028
2029     if cond:
2030       self._Error(ecode, *args, **kwargs)
2031
2032     # do not mark the operation as failed for WARN cases only
2033     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2034       self.bad = self.bad or cond
2035
2036
2037 class LUClusterVerify(NoHooksLU):
2038   """Submits all jobs necessary to verify the cluster.
2039
2040   """
2041   REQ_BGL = False
2042
2043   def ExpandNames(self):
2044     self.needed_locks = {}
2045
2046   def Exec(self, feedback_fn):
2047     jobs = []
2048
2049     if self.op.group_name:
2050       groups = [self.op.group_name]
2051       depends_fn = lambda: None
2052     else:
2053       groups = self.cfg.GetNodeGroupList()
2054
2055       # Verify global configuration
2056       jobs.append([
2057         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2058         ])
2059
2060       # Always depend on global verification
2061       depends_fn = lambda: [(-len(jobs), [])]
2062
2063     jobs.extend(
2064       [opcodes.OpClusterVerifyGroup(group_name=group,
2065                                     ignore_errors=self.op.ignore_errors,
2066                                     depends=depends_fn())]
2067       for group in groups)
2068
2069     # Fix up all parameters
2070     for op in itertools.chain(*jobs): # pylint: disable=W0142
2071       op.debug_simulate_errors = self.op.debug_simulate_errors
2072       op.verbose = self.op.verbose
2073       op.error_codes = self.op.error_codes
2074       try:
2075         op.skip_checks = self.op.skip_checks
2076       except AttributeError:
2077         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2078
2079     return ResultWithJobs(jobs)
2080
2081
2082 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2083   """Verifies the cluster config.
2084
2085   """
2086   REQ_BGL = False
2087
2088   def _VerifyHVP(self, hvp_data):
2089     """Verifies locally the syntax of the hypervisor parameters.
2090
2091     """
2092     for item, hv_name, hv_params in hvp_data:
2093       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2094              (item, hv_name))
2095       try:
2096         hv_class = hypervisor.GetHypervisorClass(hv_name)
2097         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2098         hv_class.CheckParameterSyntax(hv_params)
2099       except errors.GenericError, err:
2100         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2101
2102   def ExpandNames(self):
2103     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2104     self.share_locks = _ShareAll()
2105
2106   def CheckPrereq(self):
2107     """Check prerequisites.
2108
2109     """
2110     # Retrieve all information
2111     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2112     self.all_node_info = self.cfg.GetAllNodesInfo()
2113     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2114
2115   def Exec(self, feedback_fn):
2116     """Verify integrity of cluster, performing various test on nodes.
2117
2118     """
2119     self.bad = False
2120     self._feedback_fn = feedback_fn
2121
2122     feedback_fn("* Verifying cluster config")
2123
2124     for msg in self.cfg.VerifyConfig():
2125       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2126
2127     feedback_fn("* Verifying cluster certificate files")
2128
2129     for cert_filename in pathutils.ALL_CERT_FILES:
2130       (errcode, msg) = _VerifyCertificate(cert_filename)
2131       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2132
2133     feedback_fn("* Verifying hypervisor parameters")
2134
2135     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2136                                                 self.all_inst_info.values()))
2137
2138     feedback_fn("* Verifying all nodes belong to an existing group")
2139
2140     # We do this verification here because, should this bogus circumstance
2141     # occur, it would never be caught by VerifyGroup, which only acts on
2142     # nodes/instances reachable from existing node groups.
2143
2144     dangling_nodes = set(node.name for node in self.all_node_info.values()
2145                          if node.group not in self.all_group_info)
2146
2147     dangling_instances = {}
2148     no_node_instances = []
2149
2150     for inst in self.all_inst_info.values():
2151       if inst.primary_node in dangling_nodes:
2152         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2153       elif inst.primary_node not in self.all_node_info:
2154         no_node_instances.append(inst.name)
2155
2156     pretty_dangling = [
2157         "%s (%s)" %
2158         (node.name,
2159          utils.CommaJoin(dangling_instances.get(node.name,
2160                                                 ["no instances"])))
2161         for node in dangling_nodes]
2162
2163     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2164                   None,
2165                   "the following nodes (and their instances) belong to a non"
2166                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2167
2168     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2169                   None,
2170                   "the following instances have a non-existing primary-node:"
2171                   " %s", utils.CommaJoin(no_node_instances))
2172
2173     return not self.bad
2174
2175
2176 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2177   """Verifies the status of a node group.
2178
2179   """
2180   HPATH = "cluster-verify"
2181   HTYPE = constants.HTYPE_CLUSTER
2182   REQ_BGL = False
2183
2184   _HOOKS_INDENT_RE = re.compile("^", re.M)
2185
2186   class NodeImage(object):
2187     """A class representing the logical and physical status of a node.
2188
2189     @type name: string
2190     @ivar name: the node name to which this object refers
2191     @ivar volumes: a structure as returned from
2192         L{ganeti.backend.GetVolumeList} (runtime)
2193     @ivar instances: a list of running instances (runtime)
2194     @ivar pinst: list of configured primary instances (config)
2195     @ivar sinst: list of configured secondary instances (config)
2196     @ivar sbp: dictionary of {primary-node: list of instances} for all
2197         instances for which this node is secondary (config)
2198     @ivar mfree: free memory, as reported by hypervisor (runtime)
2199     @ivar dfree: free disk, as reported by the node (runtime)
2200     @ivar offline: the offline status (config)
2201     @type rpc_fail: boolean
2202     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2203         not whether the individual keys were correct) (runtime)
2204     @type lvm_fail: boolean
2205     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2206     @type hyp_fail: boolean
2207     @ivar hyp_fail: whether the RPC call didn't return the instance list
2208     @type ghost: boolean
2209     @ivar ghost: whether this is a known node or not (config)
2210     @type os_fail: boolean
2211     @ivar os_fail: whether the RPC call didn't return valid OS data
2212     @type oslist: list
2213     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2214     @type vm_capable: boolean
2215     @ivar vm_capable: whether the node can host instances
2216
2217     """
2218     def __init__(self, offline=False, name=None, vm_capable=True):
2219       self.name = name
2220       self.volumes = {}
2221       self.instances = []
2222       self.pinst = []
2223       self.sinst = []
2224       self.sbp = {}
2225       self.mfree = 0
2226       self.dfree = 0
2227       self.offline = offline
2228       self.vm_capable = vm_capable
2229       self.rpc_fail = False
2230       self.lvm_fail = False
2231       self.hyp_fail = False
2232       self.ghost = False
2233       self.os_fail = False
2234       self.oslist = {}
2235
2236   def ExpandNames(self):
2237     # This raises errors.OpPrereqError on its own:
2238     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2239
2240     # Get instances in node group; this is unsafe and needs verification later
2241     inst_names = \
2242       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2243
2244     self.needed_locks = {
2245       locking.LEVEL_INSTANCE: inst_names,
2246       locking.LEVEL_NODEGROUP: [self.group_uuid],
2247       locking.LEVEL_NODE: [],
2248
2249       # This opcode is run by watcher every five minutes and acquires all nodes
2250       # for a group. It doesn't run for a long time, so it's better to acquire
2251       # the node allocation lock as well.
2252       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2253       }
2254
2255     self.share_locks = _ShareAll()
2256
2257   def DeclareLocks(self, level):
2258     if level == locking.LEVEL_NODE:
2259       # Get members of node group; this is unsafe and needs verification later
2260       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2261
2262       all_inst_info = self.cfg.GetAllInstancesInfo()
2263
2264       # In Exec(), we warn about mirrored instances that have primary and
2265       # secondary living in separate node groups. To fully verify that
2266       # volumes for these instances are healthy, we will need to do an
2267       # extra call to their secondaries. We ensure here those nodes will
2268       # be locked.
2269       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2270         # Important: access only the instances whose lock is owned
2271         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2272           nodes.update(all_inst_info[inst].secondary_nodes)
2273
2274       self.needed_locks[locking.LEVEL_NODE] = nodes
2275
2276   def CheckPrereq(self):
2277     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2278     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2279
2280     group_nodes = set(self.group_info.members)
2281     group_instances = \
2282       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2283
2284     unlocked_nodes = \
2285         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2286
2287     unlocked_instances = \
2288         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2289
2290     if unlocked_nodes:
2291       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2292                                  utils.CommaJoin(unlocked_nodes),
2293                                  errors.ECODE_STATE)
2294
2295     if unlocked_instances:
2296       raise errors.OpPrereqError("Missing lock for instances: %s" %
2297                                  utils.CommaJoin(unlocked_instances),
2298                                  errors.ECODE_STATE)
2299
2300     self.all_node_info = self.cfg.GetAllNodesInfo()
2301     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2302
2303     self.my_node_names = utils.NiceSort(group_nodes)
2304     self.my_inst_names = utils.NiceSort(group_instances)
2305
2306     self.my_node_info = dict((name, self.all_node_info[name])
2307                              for name in self.my_node_names)
2308
2309     self.my_inst_info = dict((name, self.all_inst_info[name])
2310                              for name in self.my_inst_names)
2311
2312     # We detect here the nodes that will need the extra RPC calls for verifying
2313     # split LV volumes; they should be locked.
2314     extra_lv_nodes = set()
2315
2316     for inst in self.my_inst_info.values():
2317       if inst.disk_template in constants.DTS_INT_MIRROR:
2318         for nname in inst.all_nodes:
2319           if self.all_node_info[nname].group != self.group_uuid:
2320             extra_lv_nodes.add(nname)
2321
2322     unlocked_lv_nodes = \
2323         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2324
2325     if unlocked_lv_nodes:
2326       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2327                                  utils.CommaJoin(unlocked_lv_nodes),
2328                                  errors.ECODE_STATE)
2329     self.extra_lv_nodes = list(extra_lv_nodes)
2330
2331   def _VerifyNode(self, ninfo, nresult):
2332     """Perform some basic validation on data returned from a node.
2333
2334       - check the result data structure is well formed and has all the
2335         mandatory fields
2336       - check ganeti version
2337
2338     @type ninfo: L{objects.Node}
2339     @param ninfo: the node to check
2340     @param nresult: the results from the node
2341     @rtype: boolean
2342     @return: whether overall this call was successful (and we can expect
2343          reasonable values in the respose)
2344
2345     """
2346     node = ninfo.name
2347     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2348
2349     # main result, nresult should be a non-empty dict
2350     test = not nresult or not isinstance(nresult, dict)
2351     _ErrorIf(test, constants.CV_ENODERPC, node,
2352                   "unable to verify node: no data returned")
2353     if test:
2354       return False
2355
2356     # compares ganeti version
2357     local_version = constants.PROTOCOL_VERSION
2358     remote_version = nresult.get("version", None)
2359     test = not (remote_version and
2360                 isinstance(remote_version, (list, tuple)) and
2361                 len(remote_version) == 2)
2362     _ErrorIf(test, constants.CV_ENODERPC, node,
2363              "connection to node returned invalid data")
2364     if test:
2365       return False
2366
2367     test = local_version != remote_version[0]
2368     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2369              "incompatible protocol versions: master %s,"
2370              " node %s", local_version, remote_version[0])
2371     if test:
2372       return False
2373
2374     # node seems compatible, we can actually try to look into its results
2375
2376     # full package version
2377     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2378                   constants.CV_ENODEVERSION, node,
2379                   "software version mismatch: master %s, node %s",
2380                   constants.RELEASE_VERSION, remote_version[1],
2381                   code=self.ETYPE_WARNING)
2382
2383     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2384     if ninfo.vm_capable and isinstance(hyp_result, dict):
2385       for hv_name, hv_result in hyp_result.iteritems():
2386         test = hv_result is not None
2387         _ErrorIf(test, constants.CV_ENODEHV, node,
2388                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2389
2390     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2391     if ninfo.vm_capable and isinstance(hvp_result, list):
2392       for item, hv_name, hv_result in hvp_result:
2393         _ErrorIf(True, constants.CV_ENODEHV, node,
2394                  "hypervisor %s parameter verify failure (source %s): %s",
2395                  hv_name, item, hv_result)
2396
2397     test = nresult.get(constants.NV_NODESETUP,
2398                        ["Missing NODESETUP results"])
2399     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2400              "; ".join(test))
2401
2402     return True
2403
2404   def _VerifyNodeTime(self, ninfo, nresult,
2405                       nvinfo_starttime, nvinfo_endtime):
2406     """Check the node time.
2407
2408     @type ninfo: L{objects.Node}
2409     @param ninfo: the node to check
2410     @param nresult: the remote results for the node
2411     @param nvinfo_starttime: the start time of the RPC call
2412     @param nvinfo_endtime: the end time of the RPC call
2413
2414     """
2415     node = ninfo.name
2416     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2417
2418     ntime = nresult.get(constants.NV_TIME, None)
2419     try:
2420       ntime_merged = utils.MergeTime(ntime)
2421     except (ValueError, TypeError):
2422       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2423       return
2424
2425     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2426       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2427     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2428       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2429     else:
2430       ntime_diff = None
2431
2432     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2433              "Node time diverges by at least %s from master node time",
2434              ntime_diff)
2435
2436   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2437     """Check the node LVM results.
2438
2439     @type ninfo: L{objects.Node}
2440     @param ninfo: the node to check
2441     @param nresult: the remote results for the node
2442     @param vg_name: the configured VG name
2443
2444     """
2445     if vg_name is None:
2446       return
2447
2448     node = ninfo.name
2449     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2450
2451     # checks vg existence and size > 20G
2452     vglist = nresult.get(constants.NV_VGLIST, None)
2453     test = not vglist
2454     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2455     if not test:
2456       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2457                                             constants.MIN_VG_SIZE)
2458       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2459
2460     # check pv names (and possibly sizes)
2461     pvlist_dict = nresult.get(constants.NV_PVLIST, None)
2462     test = pvlist_dict is None
2463     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2464     if not test:
2465       pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
2466       # check that ':' is not present in PV names, since it's a
2467       # special character for lvcreate (denotes the range of PEs to
2468       # use on the PV)
2469       for pv in pvlist:
2470         test = ":" in pv.name
2471         _ErrorIf(test, constants.CV_ENODELVM, node,
2472                  "Invalid character ':' in PV '%s' of VG '%s'",
2473                  pv.name, pv.vg_name)
2474
2475   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2476     """Check the node bridges.
2477
2478     @type ninfo: L{objects.Node}
2479     @param ninfo: the node to check
2480     @param nresult: the remote results for the node
2481     @param bridges: the expected list of bridges
2482
2483     """
2484     if not bridges:
2485       return
2486
2487     node = ninfo.name
2488     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2489
2490     missing = nresult.get(constants.NV_BRIDGES, None)
2491     test = not isinstance(missing, list)
2492     _ErrorIf(test, constants.CV_ENODENET, node,
2493              "did not return valid bridge information")
2494     if not test:
2495       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2496                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2497
2498   def _VerifyNodeUserScripts(self, ninfo, nresult):
2499     """Check the results of user scripts presence and executability on the node
2500
2501     @type ninfo: L{objects.Node}
2502     @param ninfo: the node to check
2503     @param nresult: the remote results for the node
2504
2505     """
2506     node = ninfo.name
2507
2508     test = not constants.NV_USERSCRIPTS in nresult
2509     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2510                   "did not return user scripts information")
2511
2512     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2513     if not test:
2514       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2515                     "user scripts not present or not executable: %s" %
2516                     utils.CommaJoin(sorted(broken_scripts)))
2517
2518   def _VerifyNodeNetwork(self, ninfo, nresult):
2519     """Check the node network connectivity results.
2520
2521     @type ninfo: L{objects.Node}
2522     @param ninfo: the node to check
2523     @param nresult: the remote results for the node
2524
2525     """
2526     node = ninfo.name
2527     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2528
2529     test = constants.NV_NODELIST not in nresult
2530     _ErrorIf(test, constants.CV_ENODESSH, node,
2531              "node hasn't returned node ssh connectivity data")
2532     if not test:
2533       if nresult[constants.NV_NODELIST]:
2534         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2535           _ErrorIf(True, constants.CV_ENODESSH, node,
2536                    "ssh communication with node '%s': %s", a_node, a_msg)
2537
2538     test = constants.NV_NODENETTEST not in nresult
2539     _ErrorIf(test, constants.CV_ENODENET, node,
2540              "node hasn't returned node tcp connectivity data")
2541     if not test:
2542       if nresult[constants.NV_NODENETTEST]:
2543         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2544         for anode in nlist:
2545           _ErrorIf(True, constants.CV_ENODENET, node,
2546                    "tcp communication with node '%s': %s",
2547                    anode, nresult[constants.NV_NODENETTEST][anode])
2548
2549     test = constants.NV_MASTERIP not in nresult
2550     _ErrorIf(test, constants.CV_ENODENET, node,
2551              "node hasn't returned node master IP reachability data")
2552     if not test:
2553       if not nresult[constants.NV_MASTERIP]:
2554         if node == self.master_node:
2555           msg = "the master node cannot reach the master IP (not configured?)"
2556         else:
2557           msg = "cannot reach the master IP"
2558         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2559
2560   def _VerifyInstance(self, instance, instanceconfig, node_image,
2561                       diskstatus):
2562     """Verify an instance.
2563
2564     This function checks to see if the required block devices are
2565     available on the instance's node.
2566
2567     """
2568     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2569     node_current = instanceconfig.primary_node
2570
2571     node_vol_should = {}
2572     instanceconfig.MapLVsByNode(node_vol_should)
2573
2574     cluster = self.cfg.GetClusterInfo()
2575     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2576                                                             self.group_info)
2577     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2578     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2579              code=self.ETYPE_WARNING)
2580
2581     for node in node_vol_should:
2582       n_img = node_image[node]
2583       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2584         # ignore missing volumes on offline or broken nodes
2585         continue
2586       for volume in node_vol_should[node]:
2587         test = volume not in n_img.volumes
2588         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2589                  "volume %s missing on node %s", volume, node)
2590
2591     if instanceconfig.admin_state == constants.ADMINST_UP:
2592       pri_img = node_image[node_current]
2593       test = instance not in pri_img.instances and not pri_img.offline
2594       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2595                "instance not running on its primary node %s",
2596                node_current)
2597
2598     diskdata = [(nname, success, status, idx)
2599                 for (nname, disks) in diskstatus.items()
2600                 for idx, (success, status) in enumerate(disks)]
2601
2602     for nname, success, bdev_status, idx in diskdata:
2603       # the 'ghost node' construction in Exec() ensures that we have a
2604       # node here
2605       snode = node_image[nname]
2606       bad_snode = snode.ghost or snode.offline
2607       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2608                not success and not bad_snode,
2609                constants.CV_EINSTANCEFAULTYDISK, instance,
2610                "couldn't retrieve status for disk/%s on %s: %s",
2611                idx, nname, bdev_status)
2612       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2613                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2614                constants.CV_EINSTANCEFAULTYDISK, instance,
2615                "disk/%s on %s is faulty", idx, nname)
2616
2617   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2618     """Verify if there are any unknown volumes in the cluster.
2619
2620     The .os, .swap and backup volumes are ignored. All other volumes are
2621     reported as unknown.
2622
2623     @type reserved: L{ganeti.utils.FieldSet}
2624     @param reserved: a FieldSet of reserved volume names
2625
2626     """
2627     for node, n_img in node_image.items():
2628       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2629           self.all_node_info[node].group != self.group_uuid):
2630         # skip non-healthy nodes
2631         continue
2632       for volume in n_img.volumes:
2633         test = ((node not in node_vol_should or
2634                 volume not in node_vol_should[node]) and
2635                 not reserved.Matches(volume))
2636         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2637                       "volume %s is unknown", volume)
2638
2639   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2640     """Verify N+1 Memory Resilience.
2641
2642     Check that if one single node dies we can still start all the
2643     instances it was primary for.
2644
2645     """
2646     cluster_info = self.cfg.GetClusterInfo()
2647     for node, n_img in node_image.items():
2648       # This code checks that every node which is now listed as
2649       # secondary has enough memory to host all instances it is
2650       # supposed to should a single other node in the cluster fail.
2651       # FIXME: not ready for failover to an arbitrary node
2652       # FIXME: does not support file-backed instances
2653       # WARNING: we currently take into account down instances as well
2654       # as up ones, considering that even if they're down someone
2655       # might want to start them even in the event of a node failure.
2656       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2657         # we're skipping nodes marked offline and nodes in other groups from
2658         # the N+1 warning, since most likely we don't have good memory
2659         # infromation from them; we already list instances living on such
2660         # nodes, and that's enough warning
2661         continue
2662       #TODO(dynmem): also consider ballooning out other instances
2663       for prinode, instances in n_img.sbp.items():
2664         needed_mem = 0
2665         for instance in instances:
2666           bep = cluster_info.FillBE(instance_cfg[instance])
2667           if bep[constants.BE_AUTO_BALANCE]:
2668             needed_mem += bep[constants.BE_MINMEM]
2669         test = n_img.mfree < needed_mem
2670         self._ErrorIf(test, constants.CV_ENODEN1, node,
2671                       "not enough memory to accomodate instance failovers"
2672                       " should node %s fail (%dMiB needed, %dMiB available)",
2673                       prinode, needed_mem, n_img.mfree)
2674
2675   @classmethod
2676   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2677                    (files_all, files_opt, files_mc, files_vm)):
2678     """Verifies file checksums collected from all nodes.
2679
2680     @param errorif: Callback for reporting errors
2681     @param nodeinfo: List of L{objects.Node} objects
2682     @param master_node: Name of master node
2683     @param all_nvinfo: RPC results
2684
2685     """
2686     # Define functions determining which nodes to consider for a file
2687     files2nodefn = [
2688       (files_all, None),
2689       (files_mc, lambda node: (node.master_candidate or
2690                                node.name == master_node)),
2691       (files_vm, lambda node: node.vm_capable),
2692       ]
2693
2694     # Build mapping from filename to list of nodes which should have the file
2695     nodefiles = {}
2696     for (files, fn) in files2nodefn:
2697       if fn is None:
2698         filenodes = nodeinfo
2699       else:
2700         filenodes = filter(fn, nodeinfo)
2701       nodefiles.update((filename,
2702                         frozenset(map(operator.attrgetter("name"), filenodes)))
2703                        for filename in files)
2704
2705     assert set(nodefiles) == (files_all | files_mc | files_vm)
2706
2707     fileinfo = dict((filename, {}) for filename in nodefiles)
2708     ignore_nodes = set()
2709
2710     for node in nodeinfo:
2711       if node.offline:
2712         ignore_nodes.add(node.name)
2713         continue
2714
2715       nresult = all_nvinfo[node.name]
2716
2717       if nresult.fail_msg or not nresult.payload:
2718         node_files = None
2719       else:
2720         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2721         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2722                           for (key, value) in fingerprints.items())
2723         del fingerprints
2724
2725       test = not (node_files and isinstance(node_files, dict))
2726       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2727               "Node did not return file checksum data")
2728       if test:
2729         ignore_nodes.add(node.name)
2730         continue
2731
2732       # Build per-checksum mapping from filename to nodes having it
2733       for (filename, checksum) in node_files.items():
2734         assert filename in nodefiles
2735         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2736
2737     for (filename, checksums) in fileinfo.items():
2738       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2739
2740       # Nodes having the file
2741       with_file = frozenset(node_name
2742                             for nodes in fileinfo[filename].values()
2743                             for node_name in nodes) - ignore_nodes
2744
2745       expected_nodes = nodefiles[filename] - ignore_nodes
2746
2747       # Nodes missing file
2748       missing_file = expected_nodes - with_file
2749
2750       if filename in files_opt:
2751         # All or no nodes
2752         errorif(missing_file and missing_file != expected_nodes,
2753                 constants.CV_ECLUSTERFILECHECK, None,
2754                 "File %s is optional, but it must exist on all or no"
2755                 " nodes (not found on %s)",
2756                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2757       else:
2758         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2759                 "File %s is missing from node(s) %s", filename,
2760                 utils.CommaJoin(utils.NiceSort(missing_file)))
2761
2762         # Warn if a node has a file it shouldn't
2763         unexpected = with_file - expected_nodes
2764         errorif(unexpected,
2765                 constants.CV_ECLUSTERFILECHECK, None,
2766                 "File %s should not exist on node(s) %s",
2767                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2768
2769       # See if there are multiple versions of the file
2770       test = len(checksums) > 1
2771       if test:
2772         variants = ["variant %s on %s" %
2773                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2774                     for (idx, (checksum, nodes)) in
2775                       enumerate(sorted(checksums.items()))]
2776       else:
2777         variants = []
2778
2779       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2780               "File %s found with %s different checksums (%s)",
2781               filename, len(checksums), "; ".join(variants))
2782
2783   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2784                       drbd_map):
2785     """Verifies and the node DRBD status.
2786
2787     @type ninfo: L{objects.Node}
2788     @param ninfo: the node to check
2789     @param nresult: the remote results for the node
2790     @param instanceinfo: the dict of instances
2791     @param drbd_helper: the configured DRBD usermode helper
2792     @param drbd_map: the DRBD map as returned by
2793         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2794
2795     """
2796     node = ninfo.name
2797     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2798
2799     if drbd_helper:
2800       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2801       test = (helper_result is None)
2802       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2803                "no drbd usermode helper returned")
2804       if helper_result:
2805         status, payload = helper_result
2806         test = not status
2807         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2808                  "drbd usermode helper check unsuccessful: %s", payload)
2809         test = status and (payload != drbd_helper)
2810         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2811                  "wrong drbd usermode helper: %s", payload)
2812
2813     # compute the DRBD minors
2814     node_drbd = {}
2815     for minor, instance in drbd_map[node].items():
2816       test = instance not in instanceinfo
2817       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2818                "ghost instance '%s' in temporary DRBD map", instance)
2819         # ghost instance should not be running, but otherwise we
2820         # don't give double warnings (both ghost instance and
2821         # unallocated minor in use)
2822       if test:
2823         node_drbd[minor] = (instance, False)
2824       else:
2825         instance = instanceinfo[instance]
2826         node_drbd[minor] = (instance.name,
2827                             instance.admin_state == constants.ADMINST_UP)
2828
2829     # and now check them
2830     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2831     test = not isinstance(used_minors, (tuple, list))
2832     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2833              "cannot parse drbd status file: %s", str(used_minors))
2834     if test:
2835       # we cannot check drbd status
2836       return
2837
2838     for minor, (iname, must_exist) in node_drbd.items():
2839       test = minor not in used_minors and must_exist
2840       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2841                "drbd minor %d of instance %s is not active", minor, iname)
2842     for minor in used_minors:
2843       test = minor not in node_drbd
2844       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2845                "unallocated drbd minor %d is in use", minor)
2846
2847   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2848     """Builds the node OS structures.
2849
2850     @type ninfo: L{objects.Node}
2851     @param ninfo: the node to check
2852     @param nresult: the remote results for the node
2853     @param nimg: the node image object
2854
2855     """
2856     node = ninfo.name
2857     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2858
2859     remote_os = nresult.get(constants.NV_OSLIST, None)
2860     test = (not isinstance(remote_os, list) or
2861             not compat.all(isinstance(v, list) and len(v) == 7
2862                            for v in remote_os))
2863
2864     _ErrorIf(test, constants.CV_ENODEOS, node,
2865              "node hasn't returned valid OS data")
2866
2867     nimg.os_fail = test
2868
2869     if test:
2870       return
2871
2872     os_dict = {}
2873
2874     for (name, os_path, status, diagnose,
2875          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2876
2877       if name not in os_dict:
2878         os_dict[name] = []
2879
2880       # parameters is a list of lists instead of list of tuples due to
2881       # JSON lacking a real tuple type, fix it:
2882       parameters = [tuple(v) for v in parameters]
2883       os_dict[name].append((os_path, status, diagnose,
2884                             set(variants), set(parameters), set(api_ver)))
2885
2886     nimg.oslist = os_dict
2887
2888   def _VerifyNodeOS(self, ninfo, nimg, base):
2889     """Verifies the node OS list.
2890
2891     @type ninfo: L{objects.Node}
2892     @param ninfo: the node to check
2893     @param nimg: the node image object
2894     @param base: the 'template' node we match against (e.g. from the master)
2895
2896     """
2897     node = ninfo.name
2898     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2899
2900     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2901
2902     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2903     for os_name, os_data in nimg.oslist.items():
2904       assert os_data, "Empty OS status for OS %s?!" % os_name
2905       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2906       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2907                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2908       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2909                "OS '%s' has multiple entries (first one shadows the rest): %s",
2910                os_name, utils.CommaJoin([v[0] for v in os_data]))
2911       # comparisons with the 'base' image
2912       test = os_name not in base.oslist
2913       _ErrorIf(test, constants.CV_ENODEOS, node,
2914                "Extra OS %s not present on reference node (%s)",
2915                os_name, base.name)
2916       if test:
2917         continue
2918       assert base.oslist[os_name], "Base node has empty OS status?"
2919       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2920       if not b_status:
2921         # base OS is invalid, skipping
2922         continue
2923       for kind, a, b in [("API version", f_api, b_api),
2924                          ("variants list", f_var, b_var),
2925                          ("parameters", beautify_params(f_param),
2926                           beautify_params(b_param))]:
2927         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2928                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2929                  kind, os_name, base.name,
2930                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2931
2932     # check any missing OSes
2933     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2934     _ErrorIf(missing, constants.CV_ENODEOS, node,
2935              "OSes present on reference node %s but missing on this node: %s",
2936              base.name, utils.CommaJoin(missing))
2937
2938   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2939     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2940
2941     @type ninfo: L{objects.Node}
2942     @param ninfo: the node to check
2943     @param nresult: the remote results for the node
2944     @type is_master: bool
2945     @param is_master: Whether node is the master node
2946
2947     """
2948     node = ninfo.name
2949
2950     if (is_master and
2951         (constants.ENABLE_FILE_STORAGE or
2952          constants.ENABLE_SHARED_FILE_STORAGE)):
2953       try:
2954         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2955       except KeyError:
2956         # This should never happen
2957         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2958                       "Node did not return forbidden file storage paths")
2959       else:
2960         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2961                       "Found forbidden file storage paths: %s",
2962                       utils.CommaJoin(fspaths))
2963     else:
2964       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2965                     constants.CV_ENODEFILESTORAGEPATHS, node,
2966                     "Node should not have returned forbidden file storage"
2967                     " paths")
2968
2969   def _VerifyOob(self, ninfo, nresult):
2970     """Verifies out of band functionality of a node.
2971
2972     @type ninfo: L{objects.Node}
2973     @param ninfo: the node to check
2974     @param nresult: the remote results for the node
2975
2976     """
2977     node = ninfo.name
2978     # We just have to verify the paths on master and/or master candidates
2979     # as the oob helper is invoked on the master
2980     if ((ninfo.master_candidate or ninfo.master_capable) and
2981         constants.NV_OOB_PATHS in nresult):
2982       for path_result in nresult[constants.NV_OOB_PATHS]:
2983         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2984
2985   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2986     """Verifies and updates the node volume data.
2987
2988     This function will update a L{NodeImage}'s internal structures
2989     with data from the remote call.
2990
2991     @type ninfo: L{objects.Node}
2992     @param ninfo: the node to check
2993     @param nresult: the remote results for the node
2994     @param nimg: the node image object
2995     @param vg_name: the configured VG name
2996
2997     """
2998     node = ninfo.name
2999     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3000
3001     nimg.lvm_fail = True
3002     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3003     if vg_name is None:
3004       pass
3005     elif isinstance(lvdata, basestring):
3006       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3007                utils.SafeEncode(lvdata))
3008     elif not isinstance(lvdata, dict):
3009       _ErrorIf(True, constants.CV_ENODELVM, node,
3010                "rpc call to node failed (lvlist)")
3011     else:
3012       nimg.volumes = lvdata
3013       nimg.lvm_fail = False
3014
3015   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3016     """Verifies and updates the node instance list.
3017
3018     If the listing was successful, then updates this node's instance
3019     list. Otherwise, it marks the RPC call as failed for the instance
3020     list key.
3021
3022     @type ninfo: L{objects.Node}
3023     @param ninfo: the node to check
3024     @param nresult: the remote results for the node
3025     @param nimg: the node image object
3026
3027     """
3028     idata = nresult.get(constants.NV_INSTANCELIST, None)
3029     test = not isinstance(idata, list)
3030     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3031                   "rpc call to node failed (instancelist): %s",
3032                   utils.SafeEncode(str(idata)))
3033     if test:
3034       nimg.hyp_fail = True
3035     else:
3036       nimg.instances = idata
3037
3038   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3039     """Verifies and computes a node information map
3040
3041     @type ninfo: L{objects.Node}
3042     @param ninfo: the node to check
3043     @param nresult: the remote results for the node
3044     @param nimg: the node image object
3045     @param vg_name: the configured VG name
3046
3047     """
3048     node = ninfo.name
3049     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3050
3051     # try to read free memory (from the hypervisor)
3052     hv_info = nresult.get(constants.NV_HVINFO, None)
3053     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3054     _ErrorIf(test, constants.CV_ENODEHV, node,
3055              "rpc call to node failed (hvinfo)")
3056     if not test:
3057       try:
3058         nimg.mfree = int(hv_info["memory_free"])
3059       except (ValueError, TypeError):
3060         _ErrorIf(True, constants.CV_ENODERPC, node,
3061                  "node returned invalid nodeinfo, check hypervisor")
3062
3063     # FIXME: devise a free space model for file based instances as well
3064     if vg_name is not None:
3065       test = (constants.NV_VGLIST not in nresult or
3066               vg_name not in nresult[constants.NV_VGLIST])
3067       _ErrorIf(test, constants.CV_ENODELVM, node,
3068                "node didn't return data for the volume group '%s'"
3069                " - it is either missing or broken", vg_name)
3070       if not test:
3071         try:
3072           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3073         except (ValueError, TypeError):
3074           _ErrorIf(True, constants.CV_ENODERPC, node,
3075                    "node returned invalid LVM info, check LVM status")
3076
3077   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3078     """Gets per-disk status information for all instances.
3079
3080     @type nodelist: list of strings
3081     @param nodelist: Node names
3082     @type node_image: dict of (name, L{objects.Node})
3083     @param node_image: Node objects
3084     @type instanceinfo: dict of (name, L{objects.Instance})
3085     @param instanceinfo: Instance objects
3086     @rtype: {instance: {node: [(succes, payload)]}}
3087     @return: a dictionary of per-instance dictionaries with nodes as
3088         keys and disk information as values; the disk information is a
3089         list of tuples (success, payload)
3090
3091     """
3092     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3093
3094     node_disks = {}
3095     node_disks_devonly = {}
3096     diskless_instances = set()
3097     diskless = constants.DT_DISKLESS
3098
3099     for nname in nodelist:
3100       node_instances = list(itertools.chain(node_image[nname].pinst,
3101                                             node_image[nname].sinst))
3102       diskless_instances.update(inst for inst in node_instances
3103                                 if instanceinfo[inst].disk_template == diskless)
3104       disks = [(inst, disk)
3105                for inst in node_instances
3106                for disk in instanceinfo[inst].disks]
3107
3108       if not disks:
3109         # No need to collect data
3110         continue
3111
3112       node_disks[nname] = disks
3113
3114       # _AnnotateDiskParams makes already copies of the disks
3115       devonly = []
3116       for (inst, dev) in disks:
3117         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3118         self.cfg.SetDiskID(anno_disk, nname)
3119         devonly.append(anno_disk)
3120
3121       node_disks_devonly[nname] = devonly
3122
3123     assert len(node_disks) == len(node_disks_devonly)
3124
3125     # Collect data from all nodes with disks
3126     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3127                                                           node_disks_devonly)
3128
3129     assert len(result) == len(node_disks)
3130
3131     instdisk = {}
3132
3133     for (nname, nres) in result.items():
3134       disks = node_disks[nname]
3135
3136       if nres.offline:
3137         # No data from this node
3138         data = len(disks) * [(False, "node offline")]
3139       else:
3140         msg = nres.fail_msg
3141         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3142                  "while getting disk information: %s", msg)
3143         if msg:
3144           # No data from this node
3145           data = len(disks) * [(False, msg)]
3146         else:
3147           data = []
3148           for idx, i in enumerate(nres.payload):
3149             if isinstance(i, (tuple, list)) and len(i) == 2:
3150               data.append(i)
3151             else:
3152               logging.warning("Invalid result from node %s, entry %d: %s",
3153                               nname, idx, i)
3154               data.append((False, "Invalid result from the remote node"))
3155
3156       for ((inst, _), status) in zip(disks, data):
3157         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3158
3159     # Add empty entries for diskless instances.
3160     for inst in diskless_instances:
3161       assert inst not in instdisk
3162       instdisk[inst] = {}
3163
3164     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3165                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3166                       compat.all(isinstance(s, (tuple, list)) and
3167                                  len(s) == 2 for s in statuses)
3168                       for inst, nnames in instdisk.items()
3169                       for nname, statuses in nnames.items())
3170     if __debug__:
3171       instdisk_keys = set(instdisk)
3172       instanceinfo_keys = set(instanceinfo)
3173       assert instdisk_keys == instanceinfo_keys, \
3174         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3175          (instdisk_keys, instanceinfo_keys))
3176
3177     return instdisk
3178
3179   @staticmethod
3180   def _SshNodeSelector(group_uuid, all_nodes):
3181     """Create endless iterators for all potential SSH check hosts.
3182
3183     """
3184     nodes = [node for node in all_nodes
3185              if (node.group != group_uuid and
3186                  not node.offline)]
3187     keyfunc = operator.attrgetter("group")
3188
3189     return map(itertools.cycle,
3190                [sorted(map(operator.attrgetter("name"), names))
3191                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3192                                                   keyfunc)])
3193
3194   @classmethod
3195   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3196     """Choose which nodes should talk to which other nodes.
3197
3198     We will make nodes contact all nodes in their group, and one node from
3199     every other group.
3200
3201     @warning: This algorithm has a known issue if one node group is much
3202       smaller than others (e.g. just one node). In such a case all other
3203       nodes will talk to the single node.
3204
3205     """
3206     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3207     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3208
3209     return (online_nodes,
3210             dict((name, sorted([i.next() for i in sel]))
3211                  for name in online_nodes))
3212
3213   def BuildHooksEnv(self):
3214     """Build hooks env.
3215
3216     Cluster-Verify hooks just ran in the post phase and their failure makes
3217     the output be logged in the verify output and the verification to fail.
3218
3219     """
3220     env = {
3221       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3222       }
3223
3224     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3225                for node in self.my_node_info.values())
3226
3227     return env
3228
3229   def BuildHooksNodes(self):
3230     """Build hooks nodes.
3231
3232     """
3233     return ([], self.my_node_names)
3234
3235   def Exec(self, feedback_fn):
3236     """Verify integrity of the node group, performing various test on nodes.
3237
3238     """
3239     # This method has too many local variables. pylint: disable=R0914
3240     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3241
3242     if not self.my_node_names:
3243       # empty node group
3244       feedback_fn("* Empty node group, skipping verification")
3245       return True
3246
3247     self.bad = False
3248     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3249     verbose = self.op.verbose
3250     self._feedback_fn = feedback_fn
3251
3252     vg_name = self.cfg.GetVGName()
3253     drbd_helper = self.cfg.GetDRBDHelper()
3254     cluster = self.cfg.GetClusterInfo()
3255     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3256     hypervisors = cluster.enabled_hypervisors
3257     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3258
3259     i_non_redundant = [] # Non redundant instances
3260     i_non_a_balanced = [] # Non auto-balanced instances
3261     i_offline = 0 # Count of offline instances
3262     n_offline = 0 # Count of offline nodes
3263     n_drained = 0 # Count of nodes being drained
3264     node_vol_should = {}
3265
3266     # FIXME: verify OS list
3267
3268     # File verification
3269     filemap = _ComputeAncillaryFiles(cluster, False)
3270
3271     # do local checksums
3272     master_node = self.master_node = self.cfg.GetMasterNode()
3273     master_ip = self.cfg.GetMasterIP()
3274
3275     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3276
3277     user_scripts = []
3278     if self.cfg.GetUseExternalMipScript():
3279       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3280
3281     node_verify_param = {
3282       constants.NV_FILELIST:
3283         map(vcluster.MakeVirtualPath,
3284             utils.UniqueSequence(filename
3285                                  for files in filemap
3286                                  for filename in files)),
3287       constants.NV_NODELIST:
3288         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3289                                   self.all_node_info.values()),
3290       constants.NV_HYPERVISOR: hypervisors,
3291       constants.NV_HVPARAMS:
3292         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3293       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3294                                  for node in node_data_list
3295                                  if not node.offline],
3296       constants.NV_INSTANCELIST: hypervisors,
3297       constants.NV_VERSION: None,
3298       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3299       constants.NV_NODESETUP: None,
3300       constants.NV_TIME: None,
3301       constants.NV_MASTERIP: (master_node, master_ip),
3302       constants.NV_OSLIST: None,
3303       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3304       constants.NV_USERSCRIPTS: user_scripts,
3305       }
3306
3307     if vg_name is not None:
3308       node_verify_param[constants.NV_VGLIST] = None
3309       node_verify_param[constants.NV_LVLIST] = vg_name
3310       node_verify_param[constants.NV_PVLIST] = [vg_name]
3311
3312     if drbd_helper:
3313       node_verify_param[constants.NV_DRBDLIST] = None
3314       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3315
3316     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3317       # Load file storage paths only from master node
3318       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3319
3320     # bridge checks
3321     # FIXME: this needs to be changed per node-group, not cluster-wide
3322     bridges = set()
3323     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3324     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3325       bridges.add(default_nicpp[constants.NIC_LINK])
3326     for instance in self.my_inst_info.values():
3327       for nic in instance.nics:
3328         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3329         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3330           bridges.add(full_nic[constants.NIC_LINK])
3331
3332     if bridges:
3333       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3334
3335     # Build our expected cluster state
3336     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3337                                                  name=node.name,
3338                                                  vm_capable=node.vm_capable))
3339                       for node in node_data_list)
3340
3341     # Gather OOB paths
3342     oob_paths = []
3343     for node in self.all_node_info.values():
3344       path = _SupportsOob(self.cfg, node)
3345       if path and path not in oob_paths:
3346         oob_paths.append(path)
3347
3348     if oob_paths:
3349       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3350
3351     for instance in self.my_inst_names:
3352       inst_config = self.my_inst_info[instance]
3353       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3354         i_offline += 1
3355
3356       for nname in inst_config.all_nodes:
3357         if nname not in node_image:
3358           gnode = self.NodeImage(name=nname)
3359           gnode.ghost = (nname not in self.all_node_info)
3360           node_image[nname] = gnode
3361
3362       inst_config.MapLVsByNode(node_vol_should)
3363
3364       pnode = inst_config.primary_node
3365       node_image[pnode].pinst.append(instance)
3366
3367       for snode in inst_config.secondary_nodes:
3368         nimg = node_image[snode]
3369         nimg.sinst.append(instance)
3370         if pnode not in nimg.sbp:
3371           nimg.sbp[pnode] = []
3372         nimg.sbp[pnode].append(instance)
3373
3374     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3375     es_unset_nodes = []
3376     # The value of exclusive_storage should be the same across the group
3377     if compat.any(es_flags.values()):
3378       es_unset_nodes = [n for (n, es) in es_flags.items()
3379                         if not es]
3380
3381     if es_unset_nodes:
3382       self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3383                   "The exclusive_storage flag should be uniform in a group,"
3384                   " but these nodes have it unset: %s",
3385                   utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3386       self.LogWarning("Some checks required by exclusive storage will be"
3387                       " performed also on nodes with the flag unset")
3388
3389     # At this point, we have the in-memory data structures complete,
3390     # except for the runtime information, which we'll gather next
3391
3392     # Due to the way our RPC system works, exact response times cannot be
3393     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3394     # time before and after executing the request, we can at least have a time
3395     # window.
3396     nvinfo_starttime = time.time()
3397     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3398                                            node_verify_param,
3399                                            self.cfg.GetClusterName())
3400     nvinfo_endtime = time.time()
3401
3402     if self.extra_lv_nodes and vg_name is not None:
3403       extra_lv_nvinfo = \
3404           self.rpc.call_node_verify(self.extra_lv_nodes,
3405                                     {constants.NV_LVLIST: vg_name},
3406                                     self.cfg.GetClusterName())
3407     else:
3408       extra_lv_nvinfo = {}
3409
3410     all_drbd_map = self.cfg.ComputeDRBDMap()
3411
3412     feedback_fn("* Gathering disk information (%s nodes)" %
3413                 len(self.my_node_names))
3414     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3415                                      self.my_inst_info)
3416
3417     feedback_fn("* Verifying configuration file consistency")
3418
3419     # If not all nodes are being checked, we need to make sure the master node
3420     # and a non-checked vm_capable node are in the list.
3421     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3422     if absent_nodes:
3423       vf_nvinfo = all_nvinfo.copy()
3424       vf_node_info = list(self.my_node_info.values())
3425       additional_nodes = []
3426       if master_node not in self.my_node_info:
3427         additional_nodes.append(master_node)
3428         vf_node_info.append(self.all_node_info[master_node])
3429       # Add the first vm_capable node we find which is not included,
3430       # excluding the master node (which we already have)
3431       for node in absent_nodes:
3432         nodeinfo = self.all_node_info[node]
3433         if (nodeinfo.vm_capable and not nodeinfo.offline and
3434             node != master_node):
3435           additional_nodes.append(node)
3436           vf_node_info.append(self.all_node_info[node])
3437           break
3438       key = constants.NV_FILELIST
3439       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3440                                                  {key: node_verify_param[key]},
3441                                                  self.cfg.GetClusterName()))
3442     else:
3443       vf_nvinfo = all_nvinfo
3444       vf_node_info = self.my_node_info.values()
3445
3446     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3447
3448     feedback_fn("* Verifying node status")
3449
3450     refos_img = None
3451
3452     for node_i in node_data_list:
3453       node = node_i.name
3454       nimg = node_image[node]
3455
3456       if node_i.offline:
3457         if verbose:
3458           feedback_fn("* Skipping offline node %s" % (node,))
3459         n_offline += 1
3460         continue
3461
3462       if node == master_node:
3463         ntype = "master"
3464       elif node_i.master_candidate:
3465         ntype = "master candidate"
3466       elif node_i.drained:
3467         ntype = "drained"
3468         n_drained += 1
3469       else:
3470         ntype = "regular"
3471       if verbose:
3472         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3473
3474       msg = all_nvinfo[node].fail_msg
3475       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3476                msg)
3477       if msg:
3478         nimg.rpc_fail = True
3479         continue
3480
3481       nresult = all_nvinfo[node].payload
3482
3483       nimg.call_ok = self._VerifyNode(node_i, nresult)
3484       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3485       self._VerifyNodeNetwork(node_i, nresult)
3486       self._VerifyNodeUserScripts(node_i, nresult)
3487       self._VerifyOob(node_i, nresult)
3488       self._VerifyFileStoragePaths(node_i, nresult,
3489                                    node == master_node)
3490
3491       if nimg.vm_capable:
3492         self._VerifyNodeLVM(node_i, nresult, vg_name)
3493         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3494                              all_drbd_map)
3495
3496         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3497         self._UpdateNodeInstances(node_i, nresult, nimg)
3498         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3499         self._UpdateNodeOS(node_i, nresult, nimg)
3500
3501         if not nimg.os_fail:
3502           if refos_img is None:
3503             refos_img = nimg
3504           self._VerifyNodeOS(node_i, nimg, refos_img)
3505         self._VerifyNodeBridges(node_i, nresult, bridges)
3506
3507         # Check whether all running instancies are primary for the node. (This
3508         # can no longer be done from _VerifyInstance below, since some of the
3509         # wrong instances could be from other node groups.)
3510         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3511
3512         for inst in non_primary_inst:
3513           test = inst in self.all_inst_info
3514           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3515                    "instance should not run on node %s", node_i.name)
3516           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3517                    "node is running unknown instance %s", inst)
3518
3519     for node, result in extra_lv_nvinfo.items():
3520       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3521                               node_image[node], vg_name)
3522
3523     feedback_fn("* Verifying instance status")
3524     for instance in self.my_inst_names:
3525       if verbose:
3526         feedback_fn("* Verifying instance %s" % instance)
3527       inst_config = self.my_inst_info[instance]
3528       self._VerifyInstance(instance, inst_config, node_image,
3529                            instdisk[instance])
3530       inst_nodes_offline = []
3531
3532       pnode = inst_config.primary_node
3533       pnode_img = node_image[pnode]
3534       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3535                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3536                " primary node failed", instance)
3537
3538       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3539                pnode_img.offline,
3540                constants.CV_EINSTANCEBADNODE, instance,
3541                "instance is marked as running and lives on offline node %s",
3542                inst_config.primary_node)
3543
3544       # If the instance is non-redundant we cannot survive losing its primary
3545       # node, so we are not N+1 compliant.
3546       if inst_config.disk_template not in constants.DTS_MIRRORED:
3547         i_non_redundant.append(instance)
3548
3549       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3550                constants.CV_EINSTANCELAYOUT,
3551                instance, "instance has multiple secondary nodes: %s",
3552                utils.CommaJoin(inst_config.secondary_nodes),
3553                code=self.ETYPE_WARNING)
3554
3555       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3556         pnode = inst_config.primary_node
3557         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3558         instance_groups = {}
3559
3560         for node in instance_nodes:
3561           instance_groups.setdefault(self.all_node_info[node].group,
3562                                      []).append(node)
3563
3564         pretty_list = [
3565           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3566           # Sort so that we always list the primary node first.
3567           for group, nodes in sorted(instance_groups.items(),
3568                                      key=lambda (_, nodes): pnode in nodes,
3569                                      reverse=True)]
3570
3571         self._ErrorIf(len(instance_groups) > 1,
3572                       constants.CV_EINSTANCESPLITGROUPS,
3573                       instance, "instance has primary and secondary nodes in"
3574                       " different groups: %s", utils.CommaJoin(pretty_list),
3575                       code=self.ETYPE_WARNING)
3576
3577       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3578         i_non_a_balanced.append(instance)
3579
3580       for snode in inst_config.secondary_nodes:
3581         s_img = node_image[snode]
3582         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3583                  snode, "instance %s, connection to secondary node failed",
3584                  instance)
3585
3586         if s_img.offline:
3587           inst_nodes_offline.append(snode)
3588
3589       # warn that the instance lives on offline nodes
3590       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3591                "instance has offline secondary node(s) %s",
3592                utils.CommaJoin(inst_nodes_offline))
3593       # ... or ghost/non-vm_capable nodes
3594       for node in inst_config.all_nodes:
3595         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3596                  instance, "instance lives on ghost node %s", node)
3597         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3598                  instance, "instance lives on non-vm_capable node %s", node)
3599
3600     feedback_fn("* Verifying orphan volumes")
3601     reserved = utils.FieldSet(*cluster.reserved_lvs)
3602
3603     # We will get spurious "unknown volume" warnings if any node of this group
3604     # is secondary for an instance whose primary is in another group. To avoid
3605     # them, we find these instances and add their volumes to node_vol_should.
3606     for inst in self.all_inst_info.values():
3607       for secondary in inst.secondary_nodes:
3608         if (secondary in self.my_node_info
3609             and inst.name not in self.my_inst_info):
3610           inst.MapLVsByNode(node_vol_should)
3611           break
3612
3613     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3614
3615     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3616       feedback_fn("* Verifying N+1 Memory redundancy")
3617       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3618
3619     feedback_fn("* Other Notes")
3620     if i_non_redundant:
3621       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3622                   % len(i_non_redundant))
3623
3624     if i_non_a_balanced:
3625       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3626                   % len(i_non_a_balanced))
3627
3628     if i_offline:
3629       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3630
3631     if n_offline:
3632       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3633
3634     if n_drained:
3635       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3636
3637     return not self.bad
3638
3639   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3640     """Analyze the post-hooks' result
3641
3642     This method analyses the hook result, handles it, and sends some
3643     nicely-formatted feedback back to the user.
3644
3645     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3646         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3647     @param hooks_results: the results of the multi-node hooks rpc call
3648     @param feedback_fn: function used send feedback back to the caller
3649     @param lu_result: previous Exec result
3650     @return: the new Exec result, based on the previous result
3651         and hook results
3652
3653     """
3654     # We only really run POST phase hooks, only for non-empty groups,
3655     # and are only interested in their results
3656     if not self.my_node_names:
3657       # empty node group
3658       pass
3659     elif phase == constants.HOOKS_PHASE_POST:
3660       # Used to change hooks' output to proper indentation
3661       feedback_fn("* Hooks Results")
3662       assert hooks_results, "invalid result from hooks"
3663
3664       for node_name in hooks_results:
3665         res = hooks_results[node_name]
3666         msg = res.fail_msg
3667         test = msg and not res.offline
3668         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3669                       "Communication failure in hooks execution: %s", msg)
3670         if res.offline or msg:
3671           # No need to investigate payload if node is offline or gave
3672           # an error.
3673           continue
3674         for script, hkr, output in res.payload:
3675           test = hkr == constants.HKR_FAIL
3676           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3677                         "Script %s failed, output:", script)
3678           if test:
3679             output = self._HOOKS_INDENT_RE.sub("      ", output)
3680             feedback_fn("%s" % output)
3681             lu_result = False
3682
3683     return lu_result
3684
3685
3686 class LUClusterVerifyDisks(NoHooksLU):
3687   """Verifies the cluster disks status.
3688
3689   """
3690   REQ_BGL = False
3691
3692   def ExpandNames(self):
3693     self.share_locks = _ShareAll()
3694     self.needed_locks = {
3695       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3696       }
3697
3698   def Exec(self, feedback_fn):
3699     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3700
3701     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3702     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3703                            for group in group_names])
3704
3705
3706 class LUGroupVerifyDisks(NoHooksLU):
3707   """Verifies the status of all disks in a node group.
3708
3709   """
3710   REQ_BGL = False
3711
3712   def ExpandNames(self):
3713     # Raises errors.OpPrereqError on its own if group can't be found
3714     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3715
3716     self.share_locks = _ShareAll()
3717     self.needed_locks = {
3718       locking.LEVEL_INSTANCE: [],
3719       locking.LEVEL_NODEGROUP: [],
3720       locking.LEVEL_NODE: [],
3721
3722       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3723       # starts one instance of this opcode for every group, which means all
3724       # nodes will be locked for a short amount of time, so it's better to
3725       # acquire the node allocation lock as well.
3726       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3727       }
3728
3729   def DeclareLocks(self, level):
3730     if level == locking.LEVEL_INSTANCE:
3731       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3732
3733       # Lock instances optimistically, needs verification once node and group
3734       # locks have been acquired
3735       self.needed_locks[locking.LEVEL_INSTANCE] = \
3736         self.cfg.GetNodeGroupInstances(self.group_uuid)
3737
3738     elif level == locking.LEVEL_NODEGROUP:
3739       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3740
3741       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3742         set([self.group_uuid] +
3743             # Lock all groups used by instances optimistically; this requires
3744             # going via the node before it's locked, requiring verification
3745             # later on
3746             [group_uuid
3747              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3748              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3749
3750     elif level == locking.LEVEL_NODE:
3751       # This will only lock the nodes in the group to be verified which contain
3752       # actual instances
3753       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3754       self._LockInstancesNodes()
3755
3756       # Lock all nodes in group to be verified
3757       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3758       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3759       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3760
3761   def CheckPrereq(self):
3762     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3763     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3764     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3765
3766     assert self.group_uuid in owned_groups
3767
3768     # Check if locked instances are still correct
3769     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3770
3771     # Get instance information
3772     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3773
3774     # Check if node groups for locked instances are still correct
3775     _CheckInstancesNodeGroups(self.cfg, self.instances,
3776                               owned_groups, owned_nodes, self.group_uuid)
3777
3778   def Exec(self, feedback_fn):
3779     """Verify integrity of cluster disks.
3780
3781     @rtype: tuple of three items
3782     @return: a tuple of (dict of node-to-node_error, list of instances
3783         which need activate-disks, dict of instance: (node, volume) for
3784         missing volumes
3785
3786     """
3787     res_nodes = {}
3788     res_instances = set()
3789     res_missing = {}
3790
3791     nv_dict = _MapInstanceDisksToNodes(
3792       [inst for inst in self.instances.values()
3793        if inst.admin_state == constants.ADMINST_UP])
3794
3795     if nv_dict:
3796       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3797                              set(self.cfg.GetVmCapableNodeList()))
3798
3799       node_lvs = self.rpc.call_lv_list(nodes, [])
3800
3801       for (node, node_res) in node_lvs.items():
3802         if node_res.offline:
3803           continue
3804
3805         msg = node_res.fail_msg
3806         if msg:
3807           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3808           res_nodes[node] = msg
3809           continue
3810
3811         for lv_name, (_, _, lv_online) in node_res.payload.items():
3812           inst = nv_dict.pop((node, lv_name), None)
3813           if not (lv_online or inst is None):
3814             res_instances.add(inst)
3815
3816       # any leftover items in nv_dict are missing LVs, let's arrange the data
3817       # better
3818       for key, inst in nv_dict.iteritems():
3819         res_missing.setdefault(inst, []).append(list(key))
3820
3821     return (res_nodes, list(res_instances), res_missing)
3822
3823
3824 class LUClusterRepairDiskSizes(NoHooksLU):
3825   """Verifies the cluster disks sizes.
3826
3827   """
3828   REQ_BGL = False
3829
3830   def ExpandNames(self):
3831     if self.op.instances:
3832       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3833       # Not getting the node allocation lock as only a specific set of
3834       # instances (and their nodes) is going to be acquired
3835       self.needed_locks = {
3836         locking.LEVEL_NODE_RES: [],
3837         locking.LEVEL_INSTANCE: self.wanted_names,
3838         }
3839       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3840     else:
3841       self.wanted_names = None
3842       self.needed_locks = {
3843         locking.LEVEL_NODE_RES: locking.ALL_SET,
3844         locking.LEVEL_INSTANCE: locking.ALL_SET,
3845
3846         # This opcode is acquires the node locks for all instances
3847         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3848         }
3849
3850     self.share_locks = {
3851       locking.LEVEL_NODE_RES: 1,
3852       locking.LEVEL_INSTANCE: 0,
3853       locking.LEVEL_NODE_ALLOC: 1,
3854       }
3855
3856   def DeclareLocks(self, level):
3857     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3858       self._LockInstancesNodes(primary_only=True, level=level)
3859
3860   def CheckPrereq(self):
3861     """Check prerequisites.
3862
3863     This only checks the optional instance list against the existing names.
3864
3865     """
3866     if self.wanted_names is None:
3867       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3868
3869     self.wanted_instances = \
3870         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3871
3872   def _EnsureChildSizes(self, disk):
3873     """Ensure children of the disk have the needed disk size.
3874
3875     This is valid mainly for DRBD8 and fixes an issue where the
3876     children have smaller disk size.
3877
3878     @param disk: an L{ganeti.objects.Disk} object
3879
3880     """
3881     if disk.dev_type == constants.LD_DRBD8:
3882       assert disk.children, "Empty children for DRBD8?"
3883       fchild = disk.children[0]
3884       mismatch = fchild.size < disk.size
3885       if mismatch:
3886         self.LogInfo("Child disk has size %d, parent %d, fixing",
3887                      fchild.size, disk.size)
3888         fchild.size = disk.size
3889
3890       # and we recurse on this child only, not on the metadev
3891       return self._EnsureChildSizes(fchild) or mismatch
3892     else:
3893       return False
3894
3895   def Exec(self, feedback_fn):
3896     """Verify the size of cluster disks.
3897
3898     """
3899     # TODO: check child disks too
3900     # TODO: check differences in size between primary/secondary nodes
3901     per_node_disks = {}
3902     for instance in self.wanted_instances:
3903       pnode = instance.primary_node
3904       if pnode not in per_node_disks:
3905         per_node_disks[pnode] = []
3906       for idx, disk in enumerate(instance.disks):
3907         per_node_disks[pnode].append((instance, idx, disk))
3908
3909     assert not (frozenset(per_node_disks.keys()) -
3910                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3911       "Not owning correct locks"
3912     assert not self.owned_locks(locking.LEVEL_NODE)
3913
3914     changed = []
3915     for node, dskl in per_node_disks.items():
3916       newl = [v[2].Copy() for v in dskl]
3917       for dsk in newl:
3918         self.cfg.SetDiskID(dsk, node)
3919       result = self.rpc.call_blockdev_getsize(node, newl)
3920       if result.fail_msg:
3921         self.LogWarning("Failure in blockdev_getsize call to node"
3922                         " %s, ignoring", node)
3923         continue
3924       if len(result.payload) != len(dskl):
3925         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3926                         " result.payload=%s", node, len(dskl), result.payload)
3927         self.LogWarning("Invalid result from node %s, ignoring node results",
3928                         node)
3929         continue
3930       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3931         if size is None:
3932           self.LogWarning("Disk %d of instance %s did not return size"
3933                           " information, ignoring", idx, instance.name)
3934           continue
3935         if not isinstance(size, (int, long)):
3936           self.LogWarning("Disk %d of instance %s did not return valid"
3937                           " size information, ignoring", idx, instance.name)
3938           continue
3939         size = size >> 20
3940         if size != disk.size:
3941           self.LogInfo("Disk %d of instance %s has mismatched size,"
3942                        " correcting: recorded %d, actual %d", idx,
3943                        instance.name, disk.size, size)
3944           disk.size = size
3945           self.cfg.Update(instance, feedback_fn)
3946           changed.append((instance.name, idx, size))
3947         if self._EnsureChildSizes(disk):
3948           self.cfg.Update(instance, feedback_fn)
3949           changed.append((instance.name, idx, disk.size))
3950     return changed
3951
3952
3953 class LUClusterRename(LogicalUnit):
3954   """Rename the cluster.
3955
3956   """
3957   HPATH = "cluster-rename"
3958   HTYPE = constants.HTYPE_CLUSTER
3959
3960   def BuildHooksEnv(self):
3961     """Build hooks env.
3962
3963     """
3964     return {
3965       "OP_TARGET": self.cfg.GetClusterName(),
3966       "NEW_NAME": self.op.name,
3967       }
3968
3969   def BuildHooksNodes(self):
3970     """Build hooks nodes.
3971
3972     """
3973     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3974
3975   def CheckPrereq(self):
3976     """Verify that the passed name is a valid one.
3977
3978     """
3979     hostname = netutils.GetHostname(name=self.op.name,
3980                                     family=self.cfg.GetPrimaryIPFamily())
3981
3982     new_name = hostname.name
3983     self.ip = new_ip = hostname.ip
3984     old_name = self.cfg.GetClusterName()
3985     old_ip = self.cfg.GetMasterIP()
3986     if new_name == old_name and new_ip == old_ip:
3987       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3988                                  " cluster has changed",
3989                                  errors.ECODE_INVAL)
3990     if new_ip != old_ip:
3991       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3992         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3993                                    " reachable on the network" %
3994                                    new_ip, errors.ECODE_NOTUNIQUE)
3995
3996     self.op.name = new_name
3997
3998   def Exec(self, feedback_fn):
3999     """Rename the cluster.
4000
4001     """
4002     clustername = self.op.name
4003     new_ip = self.ip
4004
4005     # shutdown the master IP
4006     master_params = self.cfg.GetMasterNetworkParameters()
4007     ems = self.cfg.GetUseExternalMipScript()
4008     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4009                                                      master_params, ems)
4010     result.Raise("Could not disable the master role")
4011
4012     try:
4013       cluster = self.cfg.GetClusterInfo()
4014       cluster.cluster_name = clustername
4015       cluster.master_ip = new_ip
4016       self.cfg.Update(cluster, feedback_fn)
4017
4018       # update the known hosts file
4019       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4020       node_list = self.cfg.GetOnlineNodeList()
4021       try:
4022         node_list.remove(master_params.name)
4023       except ValueError:
4024         pass
4025       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4026     finally:
4027       master_params.ip = new_ip
4028       result = self.rpc.call_node_activate_master_ip(master_params.name,
4029                                                      master_params, ems)
4030       msg = result.fail_msg
4031       if msg:
4032         self.LogWarning("Could not re-enable the master role on"
4033                         " the master, please restart manually: %s", msg)
4034
4035     return clustername
4036
4037
4038 def _ValidateNetmask(cfg, netmask):
4039   """Checks if a netmask is valid.
4040
4041   @type cfg: L{config.ConfigWriter}
4042   @param cfg: The cluster configuration
4043   @type netmask: int
4044   @param netmask: the netmask to be verified
4045   @raise errors.OpPrereqError: if the validation fails
4046
4047   """
4048   ip_family = cfg.GetPrimaryIPFamily()
4049   try:
4050     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4051   except errors.ProgrammerError:
4052     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4053                                ip_family, errors.ECODE_INVAL)
4054   if not ipcls.ValidateNetmask(netmask):
4055     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4056                                 (netmask), errors.ECODE_INVAL)
4057
4058
4059 class LUClusterSetParams(LogicalUnit):
4060   """Change the parameters of the cluster.
4061
4062   """
4063   HPATH = "cluster-modify"
4064   HTYPE = constants.HTYPE_CLUSTER
4065   REQ_BGL = False
4066
4067   def CheckArguments(self):
4068     """Check parameters
4069
4070     """
4071     if self.op.uid_pool:
4072       uidpool.CheckUidPool(self.op.uid_pool)
4073
4074     if self.op.add_uids:
4075       uidpool.CheckUidPool(self.op.add_uids)
4076
4077     if self.op.remove_uids:
4078       uidpool.CheckUidPool(self.op.remove_uids)
4079
4080     if self.op.master_netmask is not None:
4081       _ValidateNetmask(self.cfg, self.op.master_netmask)
4082
4083     if self.op.diskparams:
4084       for dt_params in self.op.diskparams.values():
4085         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4086       try:
4087         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4088       except errors.OpPrereqError, err:
4089         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4090                                    errors.ECODE_INVAL)
4091
4092   def ExpandNames(self):
4093     # FIXME: in the future maybe other cluster params won't require checking on
4094     # all nodes to be modified.
4095     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4096     # resource locks the right thing, shouldn't it be the BGL instead?
4097     self.needed_locks = {
4098       locking.LEVEL_NODE: locking.ALL_SET,
4099       locking.LEVEL_INSTANCE: locking.ALL_SET,
4100       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4101       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4102     }
4103     self.share_locks = _ShareAll()
4104
4105   def BuildHooksEnv(self):
4106     """Build hooks env.
4107
4108     """
4109     return {
4110       "OP_TARGET": self.cfg.GetClusterName(),
4111       "NEW_VG_NAME": self.op.vg_name,
4112       }
4113
4114   def BuildHooksNodes(self):
4115     """Build hooks nodes.
4116
4117     """
4118     mn = self.cfg.GetMasterNode()
4119     return ([mn], [mn])
4120
4121   def CheckPrereq(self):
4122     """Check prerequisites.
4123
4124     This checks whether the given params don't conflict and
4125     if the given volume group is valid.
4126
4127     """
4128     if self.op.vg_name is not None and not self.op.vg_name:
4129       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4130         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4131                                    " instances exist", errors.ECODE_INVAL)
4132
4133     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4134       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4135         raise errors.OpPrereqError("Cannot disable drbd helper while"
4136                                    " drbd-based instances exist",
4137                                    errors.ECODE_INVAL)
4138
4139     node_list = self.owned_locks(locking.LEVEL_NODE)
4140
4141     # if vg_name not None, checks given volume group on all nodes
4142     if self.op.vg_name:
4143       vglist = self.rpc.call_vg_list(node_list)
4144       for node in node_list:
4145         msg = vglist[node].fail_msg
4146         if msg:
4147           # ignoring down node
4148           self.LogWarning("Error while gathering data on node %s"
4149                           " (ignoring node): %s", node, msg)
4150           continue
4151         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4152                                               self.op.vg_name,
4153                                               constants.MIN_VG_SIZE)
4154         if vgstatus:
4155           raise errors.OpPrereqError("Error on node '%s': %s" %
4156                                      (node, vgstatus), errors.ECODE_ENVIRON)
4157
4158     if self.op.drbd_helper:
4159       # checks given drbd helper on all nodes
4160       helpers = self.rpc.call_drbd_helper(node_list)
4161       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4162         if ninfo.offline:
4163           self.LogInfo("Not checking drbd helper on offline node %s", node)
4164           continue
4165         msg = helpers[node].fail_msg
4166         if msg:
4167           raise errors.OpPrereqError("Error checking drbd helper on node"
4168                                      " '%s': %s" % (node, msg),
4169                                      errors.ECODE_ENVIRON)
4170         node_helper = helpers[node].payload
4171         if node_helper != self.op.drbd_helper:
4172           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4173                                      (node, node_helper), errors.ECODE_ENVIRON)
4174
4175     self.cluster = cluster = self.cfg.GetClusterInfo()
4176     # validate params changes
4177     if self.op.beparams:
4178       objects.UpgradeBeParams(self.op.beparams)
4179       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4180       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4181
4182     if self.op.ndparams:
4183       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4184       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4185
4186       # TODO: we need a more general way to handle resetting
4187       # cluster-level parameters to default values
4188       if self.new_ndparams["oob_program"] == "":
4189         self.new_ndparams["oob_program"] = \
4190             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4191
4192     if self.op.hv_state:
4193       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4194                                             self.cluster.hv_state_static)
4195       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4196                                for hv, values in new_hv_state.items())
4197
4198     if self.op.disk_state:
4199       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4200                                                 self.cluster.disk_state_static)
4201       self.new_disk_state = \
4202         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4203                             for name, values in svalues.items()))
4204              for storage, svalues in new_disk_state.items())
4205
4206     if self.op.ipolicy:
4207       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4208                                             group_policy=False)
4209
4210       all_instances = self.cfg.GetAllInstancesInfo().values()
4211       violations = set()
4212       for group in self.cfg.GetAllNodeGroupsInfo().values():
4213         instances = frozenset([inst for inst in all_instances
4214                                if compat.any(node in group.members
4215                                              for node in inst.all_nodes)])
4216         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4217         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4218         new = _ComputeNewInstanceViolations(ipol,
4219                                             new_ipolicy, instances)
4220         if new:
4221           violations.update(new)
4222
4223       if violations:
4224         self.LogWarning("After the ipolicy change the following instances"
4225                         " violate them: %s",
4226                         utils.CommaJoin(utils.NiceSort(violations)))
4227
4228     if self.op.nicparams:
4229       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4230       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4231       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4232       nic_errors = []
4233
4234       # check all instances for consistency
4235       for instance in self.cfg.GetAllInstancesInfo().values():
4236         for nic_idx, nic in enumerate(instance.nics):
4237           params_copy = copy.deepcopy(nic.nicparams)
4238           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4239
4240           # check parameter syntax
4241           try:
4242             objects.NIC.CheckParameterSyntax(params_filled)
4243           except errors.ConfigurationError, err:
4244             nic_errors.append("Instance %s, nic/%d: %s" %
4245                               (instance.name, nic_idx, err))
4246
4247           # if we're moving instances to routed, check that they have an ip
4248           target_mode = params_filled[constants.NIC_MODE]
4249           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4250             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4251                               " address" % (instance.name, nic_idx))
4252       if nic_errors:
4253         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4254                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4255
4256     # hypervisor list/parameters
4257     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4258     if self.op.hvparams:
4259       for hv_name, hv_dict in self.op.hvparams.items():
4260         if hv_name not in self.new_hvparams:
4261           self.new_hvparams[hv_name] = hv_dict
4262         else:
4263           self.new_hvparams[hv_name].update(hv_dict)
4264
4265     # disk template parameters
4266     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4267     if self.op.diskparams:
4268       for dt_name, dt_params in self.op.diskparams.items():
4269         if dt_name not in self.op.diskparams:
4270           self.new_diskparams[dt_name] = dt_params
4271         else:
4272           self.new_diskparams[dt_name].update(dt_params)
4273
4274     # os hypervisor parameters
4275     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4276     if self.op.os_hvp:
4277       for os_name, hvs in self.op.os_hvp.items():
4278         if os_name not in self.new_os_hvp:
4279           self.new_os_hvp[os_name] = hvs
4280         else:
4281           for hv_name, hv_dict in hvs.items():
4282             if hv_dict is None:
4283               # Delete if it exists
4284               self.new_os_hvp[os_name].pop(hv_name, None)
4285             elif hv_name not in self.new_os_hvp[os_name]:
4286               self.new_os_hvp[os_name][hv_name] = hv_dict
4287             else:
4288               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4289
4290     # os parameters
4291     self.new_osp = objects.FillDict(cluster.osparams, {})
4292     if self.op.osparams:
4293       for os_name, osp in self.op.osparams.items():
4294         if os_name not in self.new_osp:
4295           self.new_osp[os_name] = {}
4296
4297         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4298                                                   use_none=True)
4299
4300         if not self.new_osp[os_name]:
4301           # we removed all parameters
4302           del self.new_osp[os_name]
4303         else:
4304           # check the parameter validity (remote check)
4305           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4306                          os_name, self.new_osp[os_name])
4307
4308     # changes to the hypervisor list
4309     if self.op.enabled_hypervisors is not None:
4310       self.hv_list = self.op.enabled_hypervisors
4311       for hv in self.hv_list:
4312         # if the hypervisor doesn't already exist in the cluster
4313         # hvparams, we initialize it to empty, and then (in both
4314         # cases) we make sure to fill the defaults, as we might not
4315         # have a complete defaults list if the hypervisor wasn't
4316         # enabled before
4317         if hv not in new_hvp:
4318           new_hvp[hv] = {}
4319         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4320         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4321     else:
4322       self.hv_list = cluster.enabled_hypervisors
4323
4324     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4325       # either the enabled list has changed, or the parameters have, validate
4326       for hv_name, hv_params in self.new_hvparams.items():
4327         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4328             (self.op.enabled_hypervisors and
4329              hv_name in self.op.enabled_hypervisors)):
4330           # either this is a new hypervisor, or its parameters have changed
4331           hv_class = hypervisor.GetHypervisorClass(hv_name)
4332           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4333           hv_class.CheckParameterSyntax(hv_params)
4334           _CheckHVParams(self, node_list, hv_name, hv_params)
4335
4336     if self.op.os_hvp:
4337       # no need to check any newly-enabled hypervisors, since the
4338       # defaults have already been checked in the above code-block
4339       for os_name, os_hvp in self.new_os_hvp.items():
4340         for hv_name, hv_params in os_hvp.items():
4341           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4342           # we need to fill in the new os_hvp on top of the actual hv_p
4343           cluster_defaults = self.new_hvparams.get(hv_name, {})
4344           new_osp = objects.FillDict(cluster_defaults, hv_params)
4345           hv_class = hypervisor.GetHypervisorClass(hv_name)
4346           hv_class.CheckParameterSyntax(new_osp)
4347           _CheckHVParams(self, node_list, hv_name, new_osp)
4348
4349     if self.op.default_iallocator:
4350       alloc_script = utils.FindFile(self.op.default_iallocator,
4351                                     constants.IALLOCATOR_SEARCH_PATH,
4352                                     os.path.isfile)
4353       if alloc_script is None:
4354         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4355                                    " specified" % self.op.default_iallocator,
4356                                    errors.ECODE_INVAL)
4357
4358   def Exec(self, feedback_fn):
4359     """Change the parameters of the cluster.
4360
4361     """
4362     if self.op.vg_name is not None:
4363       new_volume = self.op.vg_name
4364       if not new_volume:
4365         new_volume = None
4366       if new_volume != self.cfg.GetVGName():
4367         self.cfg.SetVGName(new_volume)
4368       else:
4369         feedback_fn("Cluster LVM configuration already in desired"
4370                     " state, not changing")
4371     if self.op.drbd_helper is not None:
4372       new_helper = self.op.drbd_helper
4373       if not new_helper:
4374         new_helper = None
4375       if new_helper != self.cfg.GetDRBDHelper():
4376         self.cfg.SetDRBDHelper(new_helper)
4377       else:
4378         feedback_fn("Cluster DRBD helper already in desired state,"
4379                     " not changing")
4380     if self.op.hvparams:
4381       self.cluster.hvparams = self.new_hvparams
4382     if self.op.os_hvp:
4383       self.cluster.os_hvp = self.new_os_hvp
4384     if self.op.enabled_hypervisors is not None:
4385       self.cluster.hvparams = self.new_hvparams
4386       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4387     if self.op.beparams:
4388       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4389     if self.op.nicparams:
4390       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4391     if self.op.ipolicy:
4392       self.cluster.ipolicy = self.new_ipolicy
4393     if self.op.osparams:
4394       self.cluster.osparams = self.new_osp
4395     if self.op.ndparams:
4396       self.cluster.ndparams = self.new_ndparams
4397     if self.op.diskparams:
4398       self.cluster.diskparams = self.new_diskparams
4399     if self.op.hv_state:
4400       self.cluster.hv_state_static = self.new_hv_state
4401     if self.op.disk_state:
4402       self.cluster.disk_state_static = self.new_disk_state
4403
4404     if self.op.candidate_pool_size is not None:
4405       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4406       # we need to update the pool size here, otherwise the save will fail
4407       _AdjustCandidatePool(self, [])
4408
4409     if self.op.maintain_node_health is not None:
4410       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4411         feedback_fn("Note: CONFD was disabled at build time, node health"
4412                     " maintenance is not useful (still enabling it)")
4413       self.cluster.maintain_node_health = self.op.maintain_node_health
4414
4415     if self.op.prealloc_wipe_disks is not None:
4416       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4417
4418     if self.op.add_uids is not None:
4419       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4420
4421     if self.op.remove_uids is not None:
4422       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4423
4424     if self.op.uid_pool is not None:
4425       self.cluster.uid_pool = self.op.uid_pool
4426
4427     if self.op.default_iallocator is not None:
4428       self.cluster.default_iallocator = self.op.default_iallocator
4429
4430     if self.op.reserved_lvs is not None:
4431       self.cluster.reserved_lvs = self.op.reserved_lvs
4432
4433     if self.op.use_external_mip_script is not None:
4434       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4435
4436     def helper_os(aname, mods, desc):
4437       desc += " OS list"
4438       lst = getattr(self.cluster, aname)
4439       for key, val in mods:
4440         if key == constants.DDM_ADD:
4441           if val in lst:
4442             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4443           else:
4444             lst.append(val)
4445         elif key == constants.DDM_REMOVE:
4446           if val in lst:
4447             lst.remove(val)
4448           else:
4449             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4450         else:
4451           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4452
4453     if self.op.hidden_os:
4454       helper_os("hidden_os", self.op.hidden_os, "hidden")
4455
4456     if self.op.blacklisted_os:
4457       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4458
4459     if self.op.master_netdev:
4460       master_params = self.cfg.GetMasterNetworkParameters()
4461       ems = self.cfg.GetUseExternalMipScript()
4462       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4463                   self.cluster.master_netdev)
4464       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4465                                                        master_params, ems)
4466       result.Raise("Could not disable the master ip")
4467       feedback_fn("Changing master_netdev from %s to %s" %
4468                   (master_params.netdev, self.op.master_netdev))
4469       self.cluster.master_netdev = self.op.master_netdev
4470
4471     if self.op.master_netmask:
4472       master_params = self.cfg.GetMasterNetworkParameters()
4473       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4474       result = self.rpc.call_node_change_master_netmask(master_params.name,
4475                                                         master_params.netmask,
4476                                                         self.op.master_netmask,
4477                                                         master_params.ip,
4478                                                         master_params.netdev)
4479       if result.fail_msg:
4480         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4481         feedback_fn(msg)
4482
4483       self.cluster.master_netmask = self.op.master_netmask
4484
4485     self.cfg.Update(self.cluster, feedback_fn)
4486
4487     if self.op.master_netdev:
4488       master_params = self.cfg.GetMasterNetworkParameters()
4489       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4490                   self.op.master_netdev)
4491       ems = self.cfg.GetUseExternalMipScript()
4492       result = self.rpc.call_node_activate_master_ip(master_params.name,
4493                                                      master_params, ems)
4494       if result.fail_msg:
4495         self.LogWarning("Could not re-enable the master ip on"
4496                         " the master, please restart manually: %s",
4497                         result.fail_msg)
4498
4499
4500 def _UploadHelper(lu, nodes, fname):
4501   """Helper for uploading a file and showing warnings.
4502
4503   """
4504   if os.path.exists(fname):
4505     result = lu.rpc.call_upload_file(nodes, fname)
4506     for to_node, to_result in result.items():
4507       msg = to_result.fail_msg
4508       if msg:
4509         msg = ("Copy of file %s to node %s failed: %s" %
4510                (fname, to_node, msg))
4511         lu.LogWarning(msg)
4512
4513
4514 def _ComputeAncillaryFiles(cluster, redist):
4515   """Compute files external to Ganeti which need to be consistent.
4516
4517   @type redist: boolean
4518   @param redist: Whether to include files which need to be redistributed
4519
4520   """
4521   # Compute files for all nodes
4522   files_all = set([
4523     pathutils.SSH_KNOWN_HOSTS_FILE,
4524     pathutils.CONFD_HMAC_KEY,
4525     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4526     pathutils.SPICE_CERT_FILE,
4527     pathutils.SPICE_CACERT_FILE,
4528     pathutils.RAPI_USERS_FILE,
4529     ])
4530
4531   if redist:
4532     # we need to ship at least the RAPI certificate
4533     files_all.add(pathutils.RAPI_CERT_FILE)
4534   else:
4535     files_all.update(pathutils.ALL_CERT_FILES)
4536     files_all.update(ssconf.SimpleStore().GetFileList())
4537
4538   if cluster.modify_etc_hosts:
4539     files_all.add(pathutils.ETC_HOSTS)
4540
4541   if cluster.use_external_mip_script:
4542     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4543
4544   # Files which are optional, these must:
4545   # - be present in one other category as well
4546   # - either exist or not exist on all nodes of that category (mc, vm all)
4547   files_opt = set([
4548     pathutils.RAPI_USERS_FILE,
4549     ])
4550
4551   # Files which should only be on master candidates
4552   files_mc = set()
4553
4554   if not redist:
4555     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4556
4557   # File storage
4558   if (not redist and
4559       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4560     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4561     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4562
4563   # Files which should only be on VM-capable nodes
4564   files_vm = set(
4565     filename
4566     for hv_name in cluster.enabled_hypervisors
4567     for filename in
4568       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4569
4570   files_opt |= set(
4571     filename
4572     for hv_name in cluster.enabled_hypervisors
4573     for filename in
4574       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4575
4576   # Filenames in each category must be unique
4577   all_files_set = files_all | files_mc | files_vm
4578   assert (len(all_files_set) ==
4579           sum(map(len, [files_all, files_mc, files_vm]))), \
4580          "Found file listed in more than one file list"
4581
4582   # Optional files must be present in one other category
4583   assert all_files_set.issuperset(files_opt), \
4584          "Optional file not in a different required list"
4585
4586   # This one file should never ever be re-distributed via RPC
4587   assert not (redist and
4588               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4589
4590   return (files_all, files_opt, files_mc, files_vm)
4591
4592
4593 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4594   """Distribute additional files which are part of the cluster configuration.
4595
4596   ConfigWriter takes care of distributing the config and ssconf files, but
4597   there are more files which should be distributed to all nodes. This function
4598   makes sure those are copied.
4599
4600   @param lu: calling logical unit
4601   @param additional_nodes: list of nodes not in the config to distribute to
4602   @type additional_vm: boolean
4603   @param additional_vm: whether the additional nodes are vm-capable or not
4604
4605   """
4606   # Gather target nodes
4607   cluster = lu.cfg.GetClusterInfo()
4608   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4609
4610   online_nodes = lu.cfg.GetOnlineNodeList()
4611   online_set = frozenset(online_nodes)
4612   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4613
4614   if additional_nodes is not None:
4615     online_nodes.extend(additional_nodes)
4616     if additional_vm:
4617       vm_nodes.extend(additional_nodes)
4618
4619   # Never distribute to master node
4620   for nodelist in [online_nodes, vm_nodes]:
4621     if master_info.name in nodelist:
4622       nodelist.remove(master_info.name)
4623
4624   # Gather file lists
4625   (files_all, _, files_mc, files_vm) = \
4626     _ComputeAncillaryFiles(cluster, True)
4627
4628   # Never re-distribute configuration file from here
4629   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4630               pathutils.CLUSTER_CONF_FILE in files_vm)
4631   assert not files_mc, "Master candidates not handled in this function"
4632
4633   filemap = [
4634     (online_nodes, files_all),
4635     (vm_nodes, files_vm),
4636     ]
4637
4638   # Upload the files
4639   for (node_list, files) in filemap:
4640     for fname in files:
4641       _UploadHelper(lu, node_list, fname)
4642
4643
4644 class LUClusterRedistConf(NoHooksLU):
4645   """Force the redistribution of cluster configuration.
4646
4647   This is a very simple LU.
4648
4649   """
4650   REQ_BGL = False
4651
4652   def ExpandNames(self):
4653     self.needed_locks = {
4654       locking.LEVEL_NODE: locking.ALL_SET,
4655       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4656     }
4657     self.share_locks = _ShareAll()
4658
4659   def Exec(self, feedback_fn):
4660     """Redistribute the configuration.
4661
4662     """
4663     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4664     _RedistributeAncillaryFiles(self)
4665
4666
4667 class LUClusterActivateMasterIp(NoHooksLU):
4668   """Activate the master IP on the master node.
4669
4670   """
4671   def Exec(self, feedback_fn):
4672     """Activate the master IP.
4673
4674     """
4675     master_params = self.cfg.GetMasterNetworkParameters()
4676     ems = self.cfg.GetUseExternalMipScript()
4677     result = self.rpc.call_node_activate_master_ip(master_params.name,
4678                                                    master_params, ems)
4679     result.Raise("Could not activate the master IP")
4680
4681
4682 class LUClusterDeactivateMasterIp(NoHooksLU):
4683   """Deactivate the master IP on the master node.
4684
4685   """
4686   def Exec(self, feedback_fn):
4687     """Deactivate the master IP.
4688
4689     """
4690     master_params = self.cfg.GetMasterNetworkParameters()
4691     ems = self.cfg.GetUseExternalMipScript()
4692     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4693                                                      master_params, ems)
4694     result.Raise("Could not deactivate the master IP")
4695
4696
4697 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4698   """Sleep and poll for an instance's disk to sync.
4699
4700   """
4701   if not instance.disks or disks is not None and not disks:
4702     return True
4703
4704   disks = _ExpandCheckDisks(instance, disks)
4705
4706   if not oneshot:
4707     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4708
4709   node = instance.primary_node
4710
4711   for dev in disks:
4712     lu.cfg.SetDiskID(dev, node)
4713
4714   # TODO: Convert to utils.Retry
4715
4716   retries = 0
4717   degr_retries = 10 # in seconds, as we sleep 1 second each time
4718   while True:
4719     max_time = 0
4720     done = True
4721     cumul_degraded = False
4722     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4723     msg = rstats.fail_msg
4724     if msg:
4725       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4726       retries += 1
4727       if retries >= 10:
4728         raise errors.RemoteError("Can't contact node %s for mirror data,"
4729                                  " aborting." % node)
4730       time.sleep(6)
4731       continue
4732     rstats = rstats.payload
4733     retries = 0
4734     for i, mstat in enumerate(rstats):
4735       if mstat is None:
4736         lu.LogWarning("Can't compute data for node %s/%s",
4737                            node, disks[i].iv_name)
4738         continue
4739
4740       cumul_degraded = (cumul_degraded or
4741                         (mstat.is_degraded and mstat.sync_percent is None))
4742       if mstat.sync_percent is not None:
4743         done = False
4744         if mstat.estimated_time is not None:
4745           rem_time = ("%s remaining (estimated)" %
4746                       utils.FormatSeconds(mstat.estimated_time))
4747           max_time = mstat.estimated_time
4748         else:
4749           rem_time = "no time estimate"
4750         lu.LogInfo("- device %s: %5.2f%% done, %s",
4751                    disks[i].iv_name, mstat.sync_percent, rem_time)
4752
4753     # if we're done but degraded, let's do a few small retries, to
4754     # make sure we see a stable and not transient situation; therefore
4755     # we force restart of the loop
4756     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4757       logging.info("Degraded disks found, %d retries left", degr_retries)
4758       degr_retries -= 1
4759       time.sleep(1)
4760       continue
4761
4762     if done or oneshot:
4763       break
4764
4765     time.sleep(min(60, max_time))
4766
4767   if done:
4768     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4769
4770   return not cumul_degraded
4771
4772
4773 def _BlockdevFind(lu, node, dev, instance):
4774   """Wrapper around call_blockdev_find to annotate diskparams.
4775
4776   @param lu: A reference to the lu object
4777   @param node: The node to call out
4778   @param dev: The device to find
4779   @param instance: The instance object the device belongs to
4780   @returns The result of the rpc call
4781
4782   """
4783   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4784   return lu.rpc.call_blockdev_find(node, disk)
4785
4786
4787 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4788   """Wrapper around L{_CheckDiskConsistencyInner}.
4789
4790   """
4791   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4792   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4793                                     ldisk=ldisk)
4794
4795
4796 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4797                                ldisk=False):
4798   """Check that mirrors are not degraded.
4799
4800   @attention: The device has to be annotated already.
4801
4802   The ldisk parameter, if True, will change the test from the
4803   is_degraded attribute (which represents overall non-ok status for
4804   the device(s)) to the ldisk (representing the local storage status).
4805
4806   """
4807   lu.cfg.SetDiskID(dev, node)
4808
4809   result = True
4810
4811   if on_primary or dev.AssembleOnSecondary():
4812     rstats = lu.rpc.call_blockdev_find(node, dev)
4813     msg = rstats.fail_msg
4814     if msg:
4815       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4816       result = False
4817     elif not rstats.payload:
4818       lu.LogWarning("Can't find disk on node %s", node)
4819       result = False
4820     else:
4821       if ldisk:
4822         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4823       else:
4824         result = result and not rstats.payload.is_degraded
4825
4826   if dev.children:
4827     for child in dev.children:
4828       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4829                                                      on_primary)
4830
4831   return result
4832
4833
4834 class LUOobCommand(NoHooksLU):
4835   """Logical unit for OOB handling.
4836
4837   """
4838   REQ_BGL = False
4839   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4840
4841   def ExpandNames(self):
4842     """Gather locks we need.
4843
4844     """
4845     if self.op.node_names:
4846       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4847       lock_names = self.op.node_names
4848     else:
4849       lock_names = locking.ALL_SET
4850
4851     self.needed_locks = {
4852       locking.LEVEL_NODE: lock_names,
4853       }
4854
4855     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4856
4857     if not self.op.node_names:
4858       # Acquire node allocation lock only if all nodes are affected
4859       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4860
4861   def CheckPrereq(self):
4862     """Check prerequisites.
4863
4864     This checks:
4865      - the node exists in the configuration
4866      - OOB is supported
4867
4868     Any errors are signaled by raising errors.OpPrereqError.
4869
4870     """
4871     self.nodes = []
4872     self.master_node = self.cfg.GetMasterNode()
4873
4874     assert self.op.power_delay >= 0.0
4875
4876     if self.op.node_names:
4877       if (self.op.command in self._SKIP_MASTER and
4878           self.master_node in self.op.node_names):
4879         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4880         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4881
4882         if master_oob_handler:
4883           additional_text = ("run '%s %s %s' if you want to operate on the"
4884                              " master regardless") % (master_oob_handler,
4885                                                       self.op.command,
4886                                                       self.master_node)
4887         else:
4888           additional_text = "it does not support out-of-band operations"
4889
4890         raise errors.OpPrereqError(("Operating on the master node %s is not"
4891                                     " allowed for %s; %s") %
4892                                    (self.master_node, self.op.command,
4893                                     additional_text), errors.ECODE_INVAL)
4894     else:
4895       self.op.node_names = self.cfg.GetNodeList()
4896       if self.op.command in self._SKIP_MASTER:
4897         self.op.node_names.remove(self.master_node)
4898
4899     if self.op.command in self._SKIP_MASTER:
4900       assert self.master_node not in self.op.node_names
4901
4902     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4903       if node is None:
4904         raise errors.OpPrereqError("Node %s not found" % node_name,
4905                                    errors.ECODE_NOENT)
4906       else:
4907         self.nodes.append(node)
4908
4909       if (not self.op.ignore_status and
4910           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4911         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4912                                     " not marked offline") % node_name,
4913                                    errors.ECODE_STATE)
4914
4915   def Exec(self, feedback_fn):
4916     """Execute OOB and return result if we expect any.
4917
4918     """
4919     master_node = self.master_node
4920     ret = []
4921
4922     for idx, node in enumerate(utils.NiceSort(self.nodes,
4923                                               key=lambda node: node.name)):
4924       node_entry = [(constants.RS_NORMAL, node.name)]
4925       ret.append(node_entry)
4926
4927       oob_program = _SupportsOob(self.cfg, node)
4928
4929       if not oob_program:
4930         node_entry.append((constants.RS_UNAVAIL, None))
4931         continue
4932
4933       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4934                    self.op.command, oob_program, node.name)
4935       result = self.rpc.call_run_oob(master_node, oob_program,
4936                                      self.op.command, node.name,
4937                                      self.op.timeout)
4938
4939       if result.fail_msg:
4940         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4941                         node.name, result.fail_msg)
4942         node_entry.append((constants.RS_NODATA, None))
4943       else:
4944         try:
4945           self._CheckPayload(result)
4946         except errors.OpExecError, err:
4947           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4948                           node.name, err)
4949           node_entry.append((constants.RS_NODATA, None))
4950         else:
4951           if self.op.command == constants.OOB_HEALTH:
4952             # For health we should log important events
4953             for item, status in result.payload:
4954               if status in [constants.OOB_STATUS_WARNING,
4955                             constants.OOB_STATUS_CRITICAL]:
4956                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4957                                 item, node.name, status)
4958
4959           if self.op.command == constants.OOB_POWER_ON:
4960             node.powered = True
4961           elif self.op.command == constants.OOB_POWER_OFF:
4962             node.powered = False
4963           elif self.op.command == constants.OOB_POWER_STATUS:
4964             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4965             if powered != node.powered:
4966               logging.warning(("Recorded power state (%s) of node '%s' does not"
4967                                " match actual power state (%s)"), node.powered,
4968                               node.name, powered)
4969
4970           # For configuration changing commands we should update the node
4971           if self.op.command in (constants.OOB_POWER_ON,
4972                                  constants.OOB_POWER_OFF):
4973             self.cfg.Update(node, feedback_fn)
4974
4975           node_entry.append((constants.RS_NORMAL, result.payload))
4976
4977           if (self.op.command == constants.OOB_POWER_ON and
4978               idx < len(self.nodes) - 1):
4979             time.sleep(self.op.power_delay)
4980
4981     return ret
4982
4983   def _CheckPayload(self, result):
4984     """Checks if the payload is valid.
4985
4986     @param result: RPC result
4987     @raises errors.OpExecError: If payload is not valid
4988
4989     """
4990     errs = []
4991     if self.op.command == constants.OOB_HEALTH:
4992       if not isinstance(result.payload, list):
4993         errs.append("command 'health' is expected to return a list but got %s" %
4994                     type(result.payload))
4995       else:
4996         for item, status in result.payload:
4997           if status not in constants.OOB_STATUSES:
4998             errs.append("health item '%s' has invalid status '%s'" %
4999                         (item, status))
5000
5001     if self.op.command == constants.OOB_POWER_STATUS:
5002       if not isinstance(result.payload, dict):
5003         errs.append("power-status is expected to return a dict but got %s" %
5004                     type(result.payload))
5005
5006     if self.op.command in [
5007       constants.OOB_POWER_ON,
5008       constants.OOB_POWER_OFF,
5009       constants.OOB_POWER_CYCLE,
5010       ]:
5011       if result.payload is not None:
5012         errs.append("%s is expected to not return payload but got '%s'" %
5013                     (self.op.command, result.payload))
5014
5015     if errs:
5016       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5017                                utils.CommaJoin(errs))
5018
5019
5020 class _OsQuery(_QueryBase):
5021   FIELDS = query.OS_FIELDS
5022
5023   def ExpandNames(self, lu):
5024     # Lock all nodes in shared mode
5025     # Temporary removal of locks, should be reverted later
5026     # TODO: reintroduce locks when they are lighter-weight
5027     lu.needed_locks = {}
5028     #self.share_locks[locking.LEVEL_NODE] = 1
5029     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5030
5031     # The following variables interact with _QueryBase._GetNames
5032     if self.names:
5033       self.wanted = self.names
5034     else:
5035       self.wanted = locking.ALL_SET
5036
5037     self.do_locking = self.use_locking
5038
5039   def DeclareLocks(self, lu, level):
5040     pass
5041
5042   @staticmethod
5043   def _DiagnoseByOS(rlist):
5044     """Remaps a per-node return list into an a per-os per-node dictionary
5045
5046     @param rlist: a map with node names as keys and OS objects as values
5047
5048     @rtype: dict
5049     @return: a dictionary with osnames as keys and as value another
5050         map, with nodes as keys and tuples of (path, status, diagnose,
5051         variants, parameters, api_versions) as values, eg::
5052
5053           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5054                                      (/srv/..., False, "invalid api")],
5055                            "node2": [(/srv/..., True, "", [], [])]}
5056           }
5057
5058     """
5059     all_os = {}
5060     # we build here the list of nodes that didn't fail the RPC (at RPC
5061     # level), so that nodes with a non-responding node daemon don't
5062     # make all OSes invalid
5063     good_nodes = [node_name for node_name in rlist
5064                   if not rlist[node_name].fail_msg]
5065     for node_name, nr in rlist.items():
5066       if nr.fail_msg or not nr.payload:
5067         continue
5068       for (name, path, status, diagnose, variants,
5069            params, api_versions) in nr.payload:
5070         if name not in all_os:
5071           # build a list of nodes for this os containing empty lists
5072           # for each node in node_list
5073           all_os[name] = {}
5074           for nname in good_nodes:
5075             all_os[name][nname] = []
5076         # convert params from [name, help] to (name, help)
5077         params = [tuple(v) for v in params]
5078         all_os[name][node_name].append((path, status, diagnose,
5079                                         variants, params, api_versions))
5080     return all_os
5081
5082   def _GetQueryData(self, lu):
5083     """Computes the list of nodes and their attributes.
5084
5085     """
5086     # Locking is not used
5087     assert not (compat.any(lu.glm.is_owned(level)
5088                            for level in locking.LEVELS
5089                            if level != locking.LEVEL_CLUSTER) or
5090                 self.do_locking or self.use_locking)
5091
5092     valid_nodes = [node.name
5093                    for node in lu.cfg.GetAllNodesInfo().values()
5094                    if not node.offline and node.vm_capable]
5095     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5096     cluster = lu.cfg.GetClusterInfo()
5097
5098     data = {}
5099
5100     for (os_name, os_data) in pol.items():
5101       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5102                           hidden=(os_name in cluster.hidden_os),
5103                           blacklisted=(os_name in cluster.blacklisted_os))
5104
5105       variants = set()
5106       parameters = set()
5107       api_versions = set()
5108
5109       for idx, osl in enumerate(os_data.values()):
5110         info.valid = bool(info.valid and osl and osl[0][1])
5111         if not info.valid:
5112           break
5113
5114         (node_variants, node_params, node_api) = osl[0][3:6]
5115         if idx == 0:
5116           # First entry
5117           variants.update(node_variants)
5118           parameters.update(node_params)
5119           api_versions.update(node_api)
5120         else:
5121           # Filter out inconsistent values
5122           variants.intersection_update(node_variants)
5123           parameters.intersection_update(node_params)
5124           api_versions.intersection_update(node_api)
5125
5126       info.variants = list(variants)
5127       info.parameters = list(parameters)
5128       info.api_versions = list(api_versions)
5129
5130       data[os_name] = info
5131
5132     # Prepare data in requested order
5133     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5134             if name in data]
5135
5136
5137 class LUOsDiagnose(NoHooksLU):
5138   """Logical unit for OS diagnose/query.
5139
5140   """
5141   REQ_BGL = False
5142
5143   @staticmethod
5144   def _BuildFilter(fields, names):
5145     """Builds a filter for querying OSes.
5146
5147     """
5148     name_filter = qlang.MakeSimpleFilter("name", names)
5149
5150     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5151     # respective field is not requested
5152     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5153                      for fname in ["hidden", "blacklisted"]
5154                      if fname not in fields]
5155     if "valid" not in fields:
5156       status_filter.append([qlang.OP_TRUE, "valid"])
5157
5158     if status_filter:
5159       status_filter.insert(0, qlang.OP_AND)
5160     else:
5161       status_filter = None
5162
5163     if name_filter and status_filter:
5164       return [qlang.OP_AND, name_filter, status_filter]
5165     elif name_filter:
5166       return name_filter
5167     else:
5168       return status_filter
5169
5170   def CheckArguments(self):
5171     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5172                        self.op.output_fields, False)
5173
5174   def ExpandNames(self):
5175     self.oq.ExpandNames(self)
5176
5177   def Exec(self, feedback_fn):
5178     return self.oq.OldStyleQuery(self)
5179
5180
5181 class _ExtStorageQuery(_QueryBase):
5182   FIELDS = query.EXTSTORAGE_FIELDS
5183
5184   def ExpandNames(self, lu):
5185     # Lock all nodes in shared mode
5186     # Temporary removal of locks, should be reverted later
5187     # TODO: reintroduce locks when they are lighter-weight
5188     lu.needed_locks = {}
5189     #self.share_locks[locking.LEVEL_NODE] = 1
5190     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5191
5192     # The following variables interact with _QueryBase._GetNames
5193     if self.names:
5194       self.wanted = self.names
5195     else:
5196       self.wanted = locking.ALL_SET
5197
5198     self.do_locking = self.use_locking
5199
5200   def DeclareLocks(self, lu, level):
5201     pass
5202
5203   @staticmethod
5204   def _DiagnoseByProvider(rlist):
5205     """Remaps a per-node return list into an a per-provider per-node dictionary
5206
5207     @param rlist: a map with node names as keys and ExtStorage objects as values
5208
5209     @rtype: dict
5210     @return: a dictionary with extstorage providers as keys and as
5211         value another map, with nodes as keys and tuples of
5212         (path, status, diagnose, parameters) as values, eg::
5213
5214           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5215                          "node2": [(/srv/..., False, "missing file")]
5216                          "node3": [(/srv/..., True, "", [])]
5217           }
5218
5219     """
5220     all_es = {}
5221     # we build here the list of nodes that didn't fail the RPC (at RPC
5222     # level), so that nodes with a non-responding node daemon don't
5223     # make all OSes invalid
5224     good_nodes = [node_name for node_name in rlist
5225                   if not rlist[node_name].fail_msg]
5226     for node_name, nr in rlist.items():
5227       if nr.fail_msg or not nr.payload:
5228         continue
5229       for (name, path, status, diagnose, params) in nr.payload:
5230         if name not in all_es:
5231           # build a list of nodes for this os containing empty lists
5232           # for each node in node_list
5233           all_es[name] = {}
5234           for nname in good_nodes:
5235             all_es[name][nname] = []
5236         # convert params from [name, help] to (name, help)
5237         params = [tuple(v) for v in params]
5238         all_es[name][node_name].append((path, status, diagnose, params))
5239     return all_es
5240
5241   def _GetQueryData(self, lu):
5242     """Computes the list of nodes and their attributes.
5243
5244     """
5245     # Locking is not used
5246     assert not (compat.any(lu.glm.is_owned(level)
5247                            for level in locking.LEVELS
5248                            if level != locking.LEVEL_CLUSTER) or
5249                 self.do_locking or self.use_locking)
5250
5251     valid_nodes = [node.name
5252                    for node in lu.cfg.GetAllNodesInfo().values()
5253                    if not node.offline and node.vm_capable]
5254     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5255
5256     data = {}
5257
5258     nodegroup_list = lu.cfg.GetNodeGroupList()
5259
5260     for (es_name, es_data) in pol.items():
5261       # For every provider compute the nodegroup validity.
5262       # To do this we need to check the validity of each node in es_data
5263       # and then construct the corresponding nodegroup dict:
5264       #      { nodegroup1: status
5265       #        nodegroup2: status
5266       #      }
5267       ndgrp_data = {}
5268       for nodegroup in nodegroup_list:
5269         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5270
5271         nodegroup_nodes = ndgrp.members
5272         nodegroup_name = ndgrp.name
5273         node_statuses = []
5274
5275         for node in nodegroup_nodes:
5276           if node in valid_nodes:
5277             if es_data[node] != []:
5278               node_status = es_data[node][0][1]
5279               node_statuses.append(node_status)
5280             else:
5281               node_statuses.append(False)
5282
5283         if False in node_statuses:
5284           ndgrp_data[nodegroup_name] = False
5285         else:
5286           ndgrp_data[nodegroup_name] = True
5287
5288       # Compute the provider's parameters
5289       parameters = set()
5290       for idx, esl in enumerate(es_data.values()):
5291         valid = bool(esl and esl[0][1])
5292         if not valid:
5293           break
5294
5295         node_params = esl[0][3]
5296         if idx == 0:
5297           # First entry
5298           parameters.update(node_params)
5299         else:
5300           # Filter out inconsistent values
5301           parameters.intersection_update(node_params)
5302
5303       params = list(parameters)
5304
5305       # Now fill all the info for this provider
5306       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5307                                   nodegroup_status=ndgrp_data,
5308                                   parameters=params)
5309
5310       data[es_name] = info
5311
5312     # Prepare data in requested order
5313     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5314             if name in data]
5315
5316
5317 class LUExtStorageDiagnose(NoHooksLU):
5318   """Logical unit for ExtStorage diagnose/query.
5319
5320   """
5321   REQ_BGL = False
5322
5323   def CheckArguments(self):
5324     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5325                                self.op.output_fields, False)
5326
5327   def ExpandNames(self):
5328     self.eq.ExpandNames(self)
5329
5330   def Exec(self, feedback_fn):
5331     return self.eq.OldStyleQuery(self)
5332
5333
5334 class LUNodeRemove(LogicalUnit):
5335   """Logical unit for removing a node.
5336
5337   """
5338   HPATH = "node-remove"
5339   HTYPE = constants.HTYPE_NODE
5340
5341   def BuildHooksEnv(self):
5342     """Build hooks env.
5343
5344     """
5345     return {
5346       "OP_TARGET": self.op.node_name,
5347       "NODE_NAME": self.op.node_name,
5348       }
5349
5350   def BuildHooksNodes(self):
5351     """Build hooks nodes.
5352
5353     This doesn't run on the target node in the pre phase as a failed
5354     node would then be impossible to remove.
5355
5356     """
5357     all_nodes = self.cfg.GetNodeList()
5358     try:
5359       all_nodes.remove(self.op.node_name)
5360     except ValueError:
5361       pass
5362     return (all_nodes, all_nodes)
5363
5364   def CheckPrereq(self):
5365     """Check prerequisites.
5366
5367     This checks:
5368      - the node exists in the configuration
5369      - it does not have primary or secondary instances
5370      - it's not the master
5371
5372     Any errors are signaled by raising errors.OpPrereqError.
5373
5374     """
5375     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5376     node = self.cfg.GetNodeInfo(self.op.node_name)
5377     assert node is not None
5378
5379     masternode = self.cfg.GetMasterNode()
5380     if node.name == masternode:
5381       raise errors.OpPrereqError("Node is the master node, failover to another"
5382                                  " node is required", errors.ECODE_INVAL)
5383
5384     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5385       if node.name in instance.all_nodes:
5386         raise errors.OpPrereqError("Instance %s is still running on the node,"
5387                                    " please remove first" % instance_name,
5388                                    errors.ECODE_INVAL)
5389     self.op.node_name = node.name
5390     self.node = node
5391
5392   def Exec(self, feedback_fn):
5393     """Removes the node from the cluster.
5394
5395     """
5396     node = self.node
5397     logging.info("Stopping the node daemon and removing configs from node %s",
5398                  node.name)
5399
5400     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5401
5402     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5403       "Not owning BGL"
5404
5405     # Promote nodes to master candidate as needed
5406     _AdjustCandidatePool(self, exceptions=[node.name])
5407     self.context.RemoveNode(node.name)
5408
5409     # Run post hooks on the node before it's removed
5410     _RunPostHook(self, node.name)
5411
5412     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5413     msg = result.fail_msg
5414     if msg:
5415       self.LogWarning("Errors encountered on the remote node while leaving"
5416                       " the cluster: %s", msg)
5417
5418     # Remove node from our /etc/hosts
5419     if self.cfg.GetClusterInfo().modify_etc_hosts:
5420       master_node = self.cfg.GetMasterNode()
5421       result = self.rpc.call_etc_hosts_modify(master_node,
5422                                               constants.ETC_HOSTS_REMOVE,
5423                                               node.name, None)
5424       result.Raise("Can't update hosts file with new host data")
5425       _RedistributeAncillaryFiles(self)
5426
5427
5428 class _NodeQuery(_QueryBase):
5429   FIELDS = query.NODE_FIELDS
5430
5431   def ExpandNames(self, lu):
5432     lu.needed_locks = {}
5433     lu.share_locks = _ShareAll()
5434
5435     if self.names:
5436       self.wanted = _GetWantedNodes(lu, self.names)
5437     else:
5438       self.wanted = locking.ALL_SET
5439
5440     self.do_locking = (self.use_locking and
5441                        query.NQ_LIVE in self.requested_data)
5442
5443     if self.do_locking:
5444       # If any non-static field is requested we need to lock the nodes
5445       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5446       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5447
5448   def DeclareLocks(self, lu, level):
5449     pass
5450
5451   def _GetQueryData(self, lu):
5452     """Computes the list of nodes and their attributes.
5453
5454     """
5455     all_info = lu.cfg.GetAllNodesInfo()
5456
5457     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5458
5459     # Gather data as requested
5460     if query.NQ_LIVE in self.requested_data:
5461       # filter out non-vm_capable nodes
5462       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5463
5464       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5465       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5466                                         [lu.cfg.GetHypervisorType()], es_flags)
5467       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5468                        for (name, nresult) in node_data.items()
5469                        if not nresult.fail_msg and nresult.payload)
5470     else:
5471       live_data = None
5472
5473     if query.NQ_INST in self.requested_data:
5474       node_to_primary = dict([(name, set()) for name in nodenames])
5475       node_to_secondary = dict([(name, set()) for name in nodenames])
5476
5477       inst_data = lu.cfg.GetAllInstancesInfo()
5478
5479       for inst in inst_data.values():
5480         if inst.primary_node in node_to_primary:
5481           node_to_primary[inst.primary_node].add(inst.name)
5482         for secnode in inst.secondary_nodes:
5483           if secnode in node_to_secondary:
5484             node_to_secondary[secnode].add(inst.name)
5485     else:
5486       node_to_primary = None
5487       node_to_secondary = None
5488
5489     if query.NQ_OOB in self.requested_data:
5490       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5491                          for name, node in all_info.iteritems())
5492     else:
5493       oob_support = None
5494
5495     if query.NQ_GROUP in self.requested_data:
5496       groups = lu.cfg.GetAllNodeGroupsInfo()
5497     else:
5498       groups = {}
5499
5500     return query.NodeQueryData([all_info[name] for name in nodenames],
5501                                live_data, lu.cfg.GetMasterNode(),
5502                                node_to_primary, node_to_secondary, groups,
5503                                oob_support, lu.cfg.GetClusterInfo())
5504
5505
5506 class LUNodeQuery(NoHooksLU):
5507   """Logical unit for querying nodes.
5508
5509   """
5510   # pylint: disable=W0142
5511   REQ_BGL = False
5512
5513   def CheckArguments(self):
5514     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5515                          self.op.output_fields, self.op.use_locking)
5516
5517   def ExpandNames(self):
5518     self.nq.ExpandNames(self)
5519
5520   def DeclareLocks(self, level):
5521     self.nq.DeclareLocks(self, level)
5522
5523   def Exec(self, feedback_fn):
5524     return self.nq.OldStyleQuery(self)
5525
5526
5527 class LUNodeQueryvols(NoHooksLU):
5528   """Logical unit for getting volumes on node(s).
5529
5530   """
5531   REQ_BGL = False
5532   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5533   _FIELDS_STATIC = utils.FieldSet("node")
5534
5535   def CheckArguments(self):
5536     _CheckOutputFields(static=self._FIELDS_STATIC,
5537                        dynamic=self._FIELDS_DYNAMIC,
5538                        selected=self.op.output_fields)
5539
5540   def ExpandNames(self):
5541     self.share_locks = _ShareAll()
5542
5543     if self.op.nodes:
5544       self.needed_locks = {
5545         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5546         }
5547     else:
5548       self.needed_locks = {
5549         locking.LEVEL_NODE: locking.ALL_SET,
5550         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5551         }
5552
5553   def Exec(self, feedback_fn):
5554     """Computes the list of nodes and their attributes.
5555
5556     """
5557     nodenames = self.owned_locks(locking.LEVEL_NODE)
5558     volumes = self.rpc.call_node_volumes(nodenames)
5559
5560     ilist = self.cfg.GetAllInstancesInfo()
5561     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5562
5563     output = []
5564     for node in nodenames:
5565       nresult = volumes[node]
5566       if nresult.offline:
5567         continue
5568       msg = nresult.fail_msg
5569       if msg:
5570         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5571         continue
5572
5573       node_vols = sorted(nresult.payload,
5574                          key=operator.itemgetter("dev"))
5575
5576       for vol in node_vols:
5577         node_output = []
5578         for field in self.op.output_fields:
5579           if field == "node":
5580             val = node
5581           elif field == "phys":
5582             val = vol["dev"]
5583           elif field == "vg":
5584             val = vol["vg"]
5585           elif field == "name":
5586             val = vol["name"]
5587           elif field == "size":
5588             val = int(float(vol["size"]))
5589           elif field == "instance":
5590             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5591           else:
5592             raise errors.ParameterError(field)
5593           node_output.append(str(val))
5594
5595         output.append(node_output)
5596
5597     return output
5598
5599
5600 class LUNodeQueryStorage(NoHooksLU):
5601   """Logical unit for getting information on storage units on node(s).
5602
5603   """
5604   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5605   REQ_BGL = False
5606
5607   def CheckArguments(self):
5608     _CheckOutputFields(static=self._FIELDS_STATIC,
5609                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5610                        selected=self.op.output_fields)
5611
5612   def ExpandNames(self):
5613     self.share_locks = _ShareAll()
5614
5615     if self.op.nodes:
5616       self.needed_locks = {
5617         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5618         }
5619     else:
5620       self.needed_locks = {
5621         locking.LEVEL_NODE: locking.ALL_SET,
5622         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5623         }
5624
5625   def Exec(self, feedback_fn):
5626     """Computes the list of nodes and their attributes.
5627
5628     """
5629     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5630
5631     # Always get name to sort by
5632     if constants.SF_NAME in self.op.output_fields:
5633       fields = self.op.output_fields[:]
5634     else:
5635       fields = [constants.SF_NAME] + self.op.output_fields
5636
5637     # Never ask for node or type as it's only known to the LU
5638     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5639       while extra in fields:
5640         fields.remove(extra)
5641
5642     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5643     name_idx = field_idx[constants.SF_NAME]
5644
5645     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5646     data = self.rpc.call_storage_list(self.nodes,
5647                                       self.op.storage_type, st_args,
5648                                       self.op.name, fields)
5649
5650     result = []
5651
5652     for node in utils.NiceSort(self.nodes):
5653       nresult = data[node]
5654       if nresult.offline:
5655         continue
5656
5657       msg = nresult.fail_msg
5658       if msg:
5659         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5660         continue
5661
5662       rows = dict([(row[name_idx], row) for row in nresult.payload])
5663
5664       for name in utils.NiceSort(rows.keys()):
5665         row = rows[name]
5666
5667         out = []
5668
5669         for field in self.op.output_fields:
5670           if field == constants.SF_NODE:
5671             val = node
5672           elif field == constants.SF_TYPE:
5673             val = self.op.storage_type
5674           elif field in field_idx:
5675             val = row[field_idx[field]]
5676           else:
5677             raise errors.ParameterError(field)
5678
5679           out.append(val)
5680
5681         result.append(out)
5682
5683     return result
5684
5685
5686 class _InstanceQuery(_QueryBase):
5687   FIELDS = query.INSTANCE_FIELDS
5688
5689   def ExpandNames(self, lu):
5690     lu.needed_locks = {}
5691     lu.share_locks = _ShareAll()
5692
5693     if self.names:
5694       self.wanted = _GetWantedInstances(lu, self.names)
5695     else:
5696       self.wanted = locking.ALL_SET
5697
5698     self.do_locking = (self.use_locking and
5699                        query.IQ_LIVE in self.requested_data)
5700     if self.do_locking:
5701       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5702       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5703       lu.needed_locks[locking.LEVEL_NODE] = []
5704       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5705
5706     self.do_grouplocks = (self.do_locking and
5707                           query.IQ_NODES in self.requested_data)
5708
5709   def DeclareLocks(self, lu, level):
5710     if self.do_locking:
5711       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5712         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5713
5714         # Lock all groups used by instances optimistically; this requires going
5715         # via the node before it's locked, requiring verification later on
5716         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5717           set(group_uuid
5718               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5719               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5720       elif level == locking.LEVEL_NODE:
5721         lu._LockInstancesNodes() # pylint: disable=W0212
5722
5723   @staticmethod
5724   def _CheckGroupLocks(lu):
5725     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5726     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5727
5728     # Check if node groups for locked instances are still correct
5729     for instance_name in owned_instances:
5730       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5731
5732   def _GetQueryData(self, lu):
5733     """Computes the list of instances and their attributes.
5734
5735     """
5736     if self.do_grouplocks:
5737       self._CheckGroupLocks(lu)
5738
5739     cluster = lu.cfg.GetClusterInfo()
5740     all_info = lu.cfg.GetAllInstancesInfo()
5741
5742     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5743
5744     instance_list = [all_info[name] for name in instance_names]
5745     nodes = frozenset(itertools.chain(*(inst.all_nodes
5746                                         for inst in instance_list)))
5747     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5748     bad_nodes = []
5749     offline_nodes = []
5750     wrongnode_inst = set()
5751
5752     # Gather data as requested
5753     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5754       live_data = {}
5755       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5756       for name in nodes:
5757         result = node_data[name]
5758         if result.offline:
5759           # offline nodes will be in both lists
5760           assert result.fail_msg
5761           offline_nodes.append(name)
5762         if result.fail_msg:
5763           bad_nodes.append(name)
5764         elif result.payload:
5765           for inst in result.payload:
5766             if inst in all_info:
5767               if all_info[inst].primary_node == name:
5768                 live_data.update(result.payload)
5769               else:
5770                 wrongnode_inst.add(inst)
5771             else:
5772               # orphan instance; we don't list it here as we don't
5773               # handle this case yet in the output of instance listing
5774               logging.warning("Orphan instance '%s' found on node %s",
5775                               inst, name)
5776         # else no instance is alive
5777     else:
5778       live_data = {}
5779
5780     if query.IQ_DISKUSAGE in self.requested_data:
5781       gmi = ganeti.masterd.instance
5782       disk_usage = dict((inst.name,
5783                          gmi.ComputeDiskSize(inst.disk_template,
5784                                              [{constants.IDISK_SIZE: disk.size}
5785                                               for disk in inst.disks]))
5786                         for inst in instance_list)
5787     else:
5788       disk_usage = None
5789
5790     if query.IQ_CONSOLE in self.requested_data:
5791       consinfo = {}
5792       for inst in instance_list:
5793         if inst.name in live_data:
5794           # Instance is running
5795           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5796         else:
5797           consinfo[inst.name] = None
5798       assert set(consinfo.keys()) == set(instance_names)
5799     else:
5800       consinfo = None
5801
5802     if query.IQ_NODES in self.requested_data:
5803       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5804                                             instance_list)))
5805       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5806       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5807                     for uuid in set(map(operator.attrgetter("group"),
5808                                         nodes.values())))
5809     else:
5810       nodes = None
5811       groups = None
5812
5813     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5814                                    disk_usage, offline_nodes, bad_nodes,
5815                                    live_data, wrongnode_inst, consinfo,
5816                                    nodes, groups)
5817
5818
5819 class LUQuery(NoHooksLU):
5820   """Query for resources/items of a certain kind.
5821
5822   """
5823   # pylint: disable=W0142
5824   REQ_BGL = False
5825
5826   def CheckArguments(self):
5827     qcls = _GetQueryImplementation(self.op.what)
5828
5829     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5830
5831   def ExpandNames(self):
5832     self.impl.ExpandNames(self)
5833
5834   def DeclareLocks(self, level):
5835     self.impl.DeclareLocks(self, level)
5836
5837   def Exec(self, feedback_fn):
5838     return self.impl.NewStyleQuery(self)
5839
5840
5841 class LUQueryFields(NoHooksLU):
5842   """Query for resources/items of a certain kind.
5843
5844   """
5845   # pylint: disable=W0142
5846   REQ_BGL = False
5847
5848   def CheckArguments(self):
5849     self.qcls = _GetQueryImplementation(self.op.what)
5850
5851   def ExpandNames(self):
5852     self.needed_locks = {}
5853
5854   def Exec(self, feedback_fn):
5855     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5856
5857
5858 class LUNodeModifyStorage(NoHooksLU):
5859   """Logical unit for modifying a storage volume on a node.
5860
5861   """
5862   REQ_BGL = False
5863
5864   def CheckArguments(self):
5865     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5866
5867     storage_type = self.op.storage_type
5868
5869     try:
5870       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5871     except KeyError:
5872       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5873                                  " modified" % storage_type,
5874                                  errors.ECODE_INVAL)
5875
5876     diff = set(self.op.changes.keys()) - modifiable
5877     if diff:
5878       raise errors.OpPrereqError("The following fields can not be modified for"
5879                                  " storage units of type '%s': %r" %
5880                                  (storage_type, list(diff)),
5881                                  errors.ECODE_INVAL)
5882
5883   def ExpandNames(self):
5884     self.needed_locks = {
5885       locking.LEVEL_NODE: self.op.node_name,
5886       }
5887
5888   def Exec(self, feedback_fn):
5889     """Computes the list of nodes and their attributes.
5890
5891     """
5892     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5893     result = self.rpc.call_storage_modify(self.op.node_name,
5894                                           self.op.storage_type, st_args,
5895                                           self.op.name, self.op.changes)
5896     result.Raise("Failed to modify storage unit '%s' on %s" %
5897                  (self.op.name, self.op.node_name))
5898
5899
5900 class LUNodeAdd(LogicalUnit):
5901   """Logical unit for adding node to the cluster.
5902
5903   """
5904   HPATH = "node-add"
5905   HTYPE = constants.HTYPE_NODE
5906   _NFLAGS = ["master_capable", "vm_capable"]
5907
5908   def CheckArguments(self):
5909     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5910     # validate/normalize the node name
5911     self.hostname = netutils.GetHostname(name=self.op.node_name,
5912                                          family=self.primary_ip_family)
5913     self.op.node_name = self.hostname.name
5914
5915     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5916       raise errors.OpPrereqError("Cannot readd the master node",
5917                                  errors.ECODE_STATE)
5918
5919     if self.op.readd and self.op.group:
5920       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5921                                  " being readded", errors.ECODE_INVAL)
5922
5923   def BuildHooksEnv(self):
5924     """Build hooks env.
5925
5926     This will run on all nodes before, and on all nodes + the new node after.
5927
5928     """
5929     return {
5930       "OP_TARGET": self.op.node_name,
5931       "NODE_NAME": self.op.node_name,
5932       "NODE_PIP": self.op.primary_ip,
5933       "NODE_SIP": self.op.secondary_ip,
5934       "MASTER_CAPABLE": str(self.op.master_capable),
5935       "VM_CAPABLE": str(self.op.vm_capable),
5936       }
5937
5938   def BuildHooksNodes(self):
5939     """Build hooks nodes.
5940
5941     """
5942     # Exclude added node
5943     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5944     post_nodes = pre_nodes + [self.op.node_name, ]
5945
5946     return (pre_nodes, post_nodes)
5947
5948   def CheckPrereq(self):
5949     """Check prerequisites.
5950
5951     This checks:
5952      - the new node is not already in the config
5953      - it is resolvable
5954      - its parameters (single/dual homed) matches the cluster
5955
5956     Any errors are signaled by raising errors.OpPrereqError.
5957
5958     """
5959     cfg = self.cfg
5960     hostname = self.hostname
5961     node = hostname.name
5962     primary_ip = self.op.primary_ip = hostname.ip
5963     if self.op.secondary_ip is None:
5964       if self.primary_ip_family == netutils.IP6Address.family:
5965         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5966                                    " IPv4 address must be given as secondary",
5967                                    errors.ECODE_INVAL)
5968       self.op.secondary_ip = primary_ip
5969
5970     secondary_ip = self.op.secondary_ip
5971     if not netutils.IP4Address.IsValid(secondary_ip):
5972       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5973                                  " address" % secondary_ip, errors.ECODE_INVAL)
5974
5975     node_list = cfg.GetNodeList()
5976     if not self.op.readd and node in node_list:
5977       raise errors.OpPrereqError("Node %s is already in the configuration" %
5978                                  node, errors.ECODE_EXISTS)
5979     elif self.op.readd and node not in node_list:
5980       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5981                                  errors.ECODE_NOENT)
5982
5983     self.changed_primary_ip = False
5984
5985     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5986       if self.op.readd and node == existing_node_name:
5987         if existing_node.secondary_ip != secondary_ip:
5988           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5989                                      " address configuration as before",
5990                                      errors.ECODE_INVAL)
5991         if existing_node.primary_ip != primary_ip:
5992           self.changed_primary_ip = True
5993
5994         continue
5995
5996       if (existing_node.primary_ip == primary_ip or
5997           existing_node.secondary_ip == primary_ip or
5998           existing_node.primary_ip == secondary_ip or
5999           existing_node.secondary_ip == secondary_ip):
6000         raise errors.OpPrereqError("New node ip address(es) conflict with"
6001                                    " existing node %s" % existing_node.name,
6002                                    errors.ECODE_NOTUNIQUE)
6003
6004     # After this 'if' block, None is no longer a valid value for the
6005     # _capable op attributes
6006     if self.op.readd:
6007       old_node = self.cfg.GetNodeInfo(node)
6008       assert old_node is not None, "Can't retrieve locked node %s" % node
6009       for attr in self._NFLAGS:
6010         if getattr(self.op, attr) is None:
6011           setattr(self.op, attr, getattr(old_node, attr))
6012     else:
6013       for attr in self._NFLAGS:
6014         if getattr(self.op, attr) is None:
6015           setattr(self.op, attr, True)
6016
6017     if self.op.readd and not self.op.vm_capable:
6018       pri, sec = cfg.GetNodeInstances(node)
6019       if pri or sec:
6020         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6021                                    " flag set to false, but it already holds"
6022                                    " instances" % node,
6023                                    errors.ECODE_STATE)
6024
6025     # check that the type of the node (single versus dual homed) is the
6026     # same as for the master
6027     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6028     master_singlehomed = myself.secondary_ip == myself.primary_ip
6029     newbie_singlehomed = secondary_ip == primary_ip
6030     if master_singlehomed != newbie_singlehomed:
6031       if master_singlehomed:
6032         raise errors.OpPrereqError("The master has no secondary ip but the"
6033                                    " new node has one",
6034                                    errors.ECODE_INVAL)
6035       else:
6036         raise errors.OpPrereqError("The master has a secondary ip but the"
6037                                    " new node doesn't have one",
6038                                    errors.ECODE_INVAL)
6039
6040     # checks reachability
6041     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6042       raise errors.OpPrereqError("Node not reachable by ping",
6043                                  errors.ECODE_ENVIRON)
6044
6045     if not newbie_singlehomed:
6046       # check reachability from my secondary ip to newbie's secondary ip
6047       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6048                               source=myself.secondary_ip):
6049         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6050                                    " based ping to node daemon port",
6051                                    errors.ECODE_ENVIRON)
6052
6053     if self.op.readd:
6054       exceptions = [node]
6055     else:
6056       exceptions = []
6057
6058     if self.op.master_capable:
6059       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6060     else:
6061       self.master_candidate = False
6062
6063     if self.op.readd:
6064       self.new_node = old_node
6065     else:
6066       node_group = cfg.LookupNodeGroup(self.op.group)
6067       self.new_node = objects.Node(name=node,
6068                                    primary_ip=primary_ip,
6069                                    secondary_ip=secondary_ip,
6070                                    master_candidate=self.master_candidate,
6071                                    offline=False, drained=False,
6072                                    group=node_group)
6073
6074     if self.op.ndparams:
6075       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6076
6077     if self.op.hv_state:
6078       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6079
6080     if self.op.disk_state:
6081       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6082
6083     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6084     #       it a property on the base class.
6085     result = rpc.DnsOnlyRunner().call_version([node])[node]
6086     result.Raise("Can't get version information from node %s" % node)
6087     if constants.PROTOCOL_VERSION == result.payload:
6088       logging.info("Communication to node %s fine, sw version %s match",
6089                    node, result.payload)
6090     else:
6091       raise errors.OpPrereqError("Version mismatch master version %s,"
6092                                  " node version %s" %
6093                                  (constants.PROTOCOL_VERSION, result.payload),
6094                                  errors.ECODE_ENVIRON)
6095
6096   def Exec(self, feedback_fn):
6097     """Adds the new node to the cluster.
6098
6099     """
6100     new_node = self.new_node
6101     node = new_node.name
6102
6103     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6104       "Not owning BGL"
6105
6106     # We adding a new node so we assume it's powered
6107     new_node.powered = True
6108
6109     # for re-adds, reset the offline/drained/master-candidate flags;
6110     # we need to reset here, otherwise offline would prevent RPC calls
6111     # later in the procedure; this also means that if the re-add
6112     # fails, we are left with a non-offlined, broken node
6113     if self.op.readd:
6114       new_node.drained = new_node.offline = False # pylint: disable=W0201
6115       self.LogInfo("Readding a node, the offline/drained flags were reset")
6116       # if we demote the node, we do cleanup later in the procedure
6117       new_node.master_candidate = self.master_candidate
6118       if self.changed_primary_ip:
6119         new_node.primary_ip = self.op.primary_ip
6120
6121     # copy the master/vm_capable flags
6122     for attr in self._NFLAGS:
6123       setattr(new_node, attr, getattr(self.op, attr))
6124
6125     # notify the user about any possible mc promotion
6126     if new_node.master_candidate:
6127       self.LogInfo("Node will be a master candidate")
6128
6129     if self.op.ndparams:
6130       new_node.ndparams = self.op.ndparams
6131     else:
6132       new_node.ndparams = {}
6133
6134     if self.op.hv_state:
6135       new_node.hv_state_static = self.new_hv_state
6136
6137     if self.op.disk_state:
6138       new_node.disk_state_static = self.new_disk_state
6139
6140     # Add node to our /etc/hosts, and add key to known_hosts
6141     if self.cfg.GetClusterInfo().modify_etc_hosts:
6142       master_node = self.cfg.GetMasterNode()
6143       result = self.rpc.call_etc_hosts_modify(master_node,
6144                                               constants.ETC_HOSTS_ADD,
6145                                               self.hostname.name,
6146                                               self.hostname.ip)
6147       result.Raise("Can't update hosts file with new host data")
6148
6149     if new_node.secondary_ip != new_node.primary_ip:
6150       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6151                                False)
6152
6153     node_verify_list = [self.cfg.GetMasterNode()]
6154     node_verify_param = {
6155       constants.NV_NODELIST: ([node], {}),
6156       # TODO: do a node-net-test as well?
6157     }
6158
6159     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6160                                        self.cfg.GetClusterName())
6161     for verifier in node_verify_list:
6162       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6163       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6164       if nl_payload:
6165         for failed in nl_payload:
6166           feedback_fn("ssh/hostname verification failed"
6167                       " (checking from %s): %s" %
6168                       (verifier, nl_payload[failed]))
6169         raise errors.OpExecError("ssh/hostname verification failed")
6170
6171     if self.op.readd:
6172       _RedistributeAncillaryFiles(self)
6173       self.context.ReaddNode(new_node)
6174       # make sure we redistribute the config
6175       self.cfg.Update(new_node, feedback_fn)
6176       # and make sure the new node will not have old files around
6177       if not new_node.master_candidate:
6178         result = self.rpc.call_node_demote_from_mc(new_node.name)
6179         msg = result.fail_msg
6180         if msg:
6181           self.LogWarning("Node failed to demote itself from master"
6182                           " candidate status: %s" % msg)
6183     else:
6184       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6185                                   additional_vm=self.op.vm_capable)
6186       self.context.AddNode(new_node, self.proc.GetECId())
6187
6188
6189 class LUNodeSetParams(LogicalUnit):
6190   """Modifies the parameters of a node.
6191
6192   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6193       to the node role (as _ROLE_*)
6194   @cvar _R2F: a dictionary from node role to tuples of flags
6195   @cvar _FLAGS: a list of attribute names corresponding to the flags
6196
6197   """
6198   HPATH = "node-modify"
6199   HTYPE = constants.HTYPE_NODE
6200   REQ_BGL = False
6201   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6202   _F2R = {
6203     (True, False, False): _ROLE_CANDIDATE,
6204     (False, True, False): _ROLE_DRAINED,
6205     (False, False, True): _ROLE_OFFLINE,
6206     (False, False, False): _ROLE_REGULAR,
6207     }
6208   _R2F = dict((v, k) for k, v in _F2R.items())
6209   _FLAGS = ["master_candidate", "drained", "offline"]
6210
6211   def CheckArguments(self):
6212     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6213     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6214                 self.op.master_capable, self.op.vm_capable,
6215                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6216                 self.op.disk_state]
6217     if all_mods.count(None) == len(all_mods):
6218       raise errors.OpPrereqError("Please pass at least one modification",
6219                                  errors.ECODE_INVAL)
6220     if all_mods.count(True) > 1:
6221       raise errors.OpPrereqError("Can't set the node into more than one"
6222                                  " state at the same time",
6223                                  errors.ECODE_INVAL)
6224
6225     # Boolean value that tells us whether we might be demoting from MC
6226     self.might_demote = (self.op.master_candidate is False or
6227                          self.op.offline is True or
6228                          self.op.drained is True or
6229                          self.op.master_capable is False)
6230
6231     if self.op.secondary_ip:
6232       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6233         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6234                                    " address" % self.op.secondary_ip,
6235                                    errors.ECODE_INVAL)
6236
6237     self.lock_all = self.op.auto_promote and self.might_demote
6238     self.lock_instances = self.op.secondary_ip is not None
6239
6240   def _InstanceFilter(self, instance):
6241     """Filter for getting affected instances.
6242
6243     """
6244     return (instance.disk_template in constants.DTS_INT_MIRROR and
6245             self.op.node_name in instance.all_nodes)
6246
6247   def ExpandNames(self):
6248     if self.lock_all:
6249       self.needed_locks = {
6250         locking.LEVEL_NODE: locking.ALL_SET,
6251
6252         # Block allocations when all nodes are locked
6253         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6254         }
6255     else:
6256       self.needed_locks = {
6257         locking.LEVEL_NODE: self.op.node_name,
6258         }
6259
6260     # Since modifying a node can have severe effects on currently running
6261     # operations the resource lock is at least acquired in shared mode
6262     self.needed_locks[locking.LEVEL_NODE_RES] = \
6263       self.needed_locks[locking.LEVEL_NODE]
6264
6265     # Get all locks except nodes in shared mode; they are not used for anything
6266     # but read-only access
6267     self.share_locks = _ShareAll()
6268     self.share_locks[locking.LEVEL_NODE] = 0
6269     self.share_locks[locking.LEVEL_NODE_RES] = 0
6270     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6271
6272     if self.lock_instances:
6273       self.needed_locks[locking.LEVEL_INSTANCE] = \
6274         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6275
6276   def BuildHooksEnv(self):
6277     """Build hooks env.
6278
6279     This runs on the master node.
6280
6281     """
6282     return {
6283       "OP_TARGET": self.op.node_name,
6284       "MASTER_CANDIDATE": str(self.op.master_candidate),
6285       "OFFLINE": str(self.op.offline),
6286       "DRAINED": str(self.op.drained),
6287       "MASTER_CAPABLE": str(self.op.master_capable),
6288       "VM_CAPABLE": str(self.op.vm_capable),
6289       }
6290
6291   def BuildHooksNodes(self):
6292     """Build hooks nodes.
6293
6294     """
6295     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6296     return (nl, nl)
6297
6298   def CheckPrereq(self):
6299     """Check prerequisites.
6300
6301     This only checks the instance list against the existing names.
6302
6303     """
6304     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6305
6306     if self.lock_instances:
6307       affected_instances = \
6308         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6309
6310       # Verify instance locks
6311       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6312       wanted_instances = frozenset(affected_instances.keys())
6313       if wanted_instances - owned_instances:
6314         raise errors.OpPrereqError("Instances affected by changing node %s's"
6315                                    " secondary IP address have changed since"
6316                                    " locks were acquired, wanted '%s', have"
6317                                    " '%s'; retry the operation" %
6318                                    (self.op.node_name,
6319                                     utils.CommaJoin(wanted_instances),
6320                                     utils.CommaJoin(owned_instances)),
6321                                    errors.ECODE_STATE)
6322     else:
6323       affected_instances = None
6324
6325     if (self.op.master_candidate is not None or
6326         self.op.drained is not None or
6327         self.op.offline is not None):
6328       # we can't change the master's node flags
6329       if self.op.node_name == self.cfg.GetMasterNode():
6330         raise errors.OpPrereqError("The master role can be changed"
6331                                    " only via master-failover",
6332                                    errors.ECODE_INVAL)
6333
6334     if self.op.master_candidate and not node.master_capable:
6335       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6336                                  " it a master candidate" % node.name,
6337                                  errors.ECODE_STATE)
6338
6339     if self.op.vm_capable is False:
6340       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6341       if ipri or isec:
6342         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6343                                    " the vm_capable flag" % node.name,
6344                                    errors.ECODE_STATE)
6345
6346     if node.master_candidate and self.might_demote and not self.lock_all:
6347       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6348       # check if after removing the current node, we're missing master
6349       # candidates
6350       (mc_remaining, mc_should, _) = \
6351           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6352       if mc_remaining < mc_should:
6353         raise errors.OpPrereqError("Not enough master candidates, please"
6354                                    " pass auto promote option to allow"
6355                                    " promotion (--auto-promote or RAPI"
6356                                    " auto_promote=True)", errors.ECODE_STATE)
6357
6358     self.old_flags = old_flags = (node.master_candidate,
6359                                   node.drained, node.offline)
6360     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6361     self.old_role = old_role = self._F2R[old_flags]
6362
6363     # Check for ineffective changes
6364     for attr in self._FLAGS:
6365       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6366         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6367         setattr(self.op, attr, None)
6368
6369     # Past this point, any flag change to False means a transition
6370     # away from the respective state, as only real changes are kept
6371
6372     # TODO: We might query the real power state if it supports OOB
6373     if _SupportsOob(self.cfg, node):
6374       if self.op.offline is False and not (node.powered or
6375                                            self.op.powered is True):
6376         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6377                                     " offline status can be reset") %
6378                                    self.op.node_name, errors.ECODE_STATE)
6379     elif self.op.powered is not None:
6380       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6381                                   " as it does not support out-of-band"
6382                                   " handling") % self.op.node_name,
6383                                  errors.ECODE_STATE)
6384
6385     # If we're being deofflined/drained, we'll MC ourself if needed
6386     if (self.op.drained is False or self.op.offline is False or
6387         (self.op.master_capable and not node.master_capable)):
6388       if _DecideSelfPromotion(self):
6389         self.op.master_candidate = True
6390         self.LogInfo("Auto-promoting node to master candidate")
6391
6392     # If we're no longer master capable, we'll demote ourselves from MC
6393     if self.op.master_capable is False and node.master_candidate:
6394       self.LogInfo("Demoting from master candidate")
6395       self.op.master_candidate = False
6396
6397     # Compute new role
6398     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6399     if self.op.master_candidate:
6400       new_role = self._ROLE_CANDIDATE
6401     elif self.op.drained:
6402       new_role = self._ROLE_DRAINED
6403     elif self.op.offline:
6404       new_role = self._ROLE_OFFLINE
6405     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6406       # False is still in new flags, which means we're un-setting (the
6407       # only) True flag
6408       new_role = self._ROLE_REGULAR
6409     else: # no new flags, nothing, keep old role
6410       new_role = old_role
6411
6412     self.new_role = new_role
6413
6414     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6415       # Trying to transition out of offline status
6416       result = self.rpc.call_version([node.name])[node.name]
6417       if result.fail_msg:
6418         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6419                                    " to report its version: %s" %
6420                                    (node.name, result.fail_msg),
6421                                    errors.ECODE_STATE)
6422       else:
6423         self.LogWarning("Transitioning node from offline to online state"
6424                         " without using re-add. Please make sure the node"
6425                         " is healthy!")
6426
6427     # When changing the secondary ip, verify if this is a single-homed to
6428     # multi-homed transition or vice versa, and apply the relevant
6429     # restrictions.
6430     if self.op.secondary_ip:
6431       # Ok even without locking, because this can't be changed by any LU
6432       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6433       master_singlehomed = master.secondary_ip == master.primary_ip
6434       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6435         if self.op.force and node.name == master.name:
6436           self.LogWarning("Transitioning from single-homed to multi-homed"
6437                           " cluster; all nodes will require a secondary IP"
6438                           " address")
6439         else:
6440           raise errors.OpPrereqError("Changing the secondary ip on a"
6441                                      " single-homed cluster requires the"
6442                                      " --force option to be passed, and the"
6443                                      " target node to be the master",
6444                                      errors.ECODE_INVAL)
6445       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6446         if self.op.force and node.name == master.name:
6447           self.LogWarning("Transitioning from multi-homed to single-homed"
6448                           " cluster; secondary IP addresses will have to be"
6449                           " removed")
6450         else:
6451           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6452                                      " same as the primary IP on a multi-homed"
6453                                      " cluster, unless the --force option is"
6454                                      " passed, and the target node is the"
6455                                      " master", errors.ECODE_INVAL)
6456
6457       assert not (frozenset(affected_instances) -
6458                   self.owned_locks(locking.LEVEL_INSTANCE))
6459
6460       if node.offline:
6461         if affected_instances:
6462           msg = ("Cannot change secondary IP address: offline node has"
6463                  " instances (%s) configured to use it" %
6464                  utils.CommaJoin(affected_instances.keys()))
6465           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6466       else:
6467         # On online nodes, check that no instances are running, and that
6468         # the node has the new ip and we can reach it.
6469         for instance in affected_instances.values():
6470           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6471                               msg="cannot change secondary ip")
6472
6473         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6474         if master.name != node.name:
6475           # check reachability from master secondary ip to new secondary ip
6476           if not netutils.TcpPing(self.op.secondary_ip,
6477                                   constants.DEFAULT_NODED_PORT,
6478                                   source=master.secondary_ip):
6479             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6480                                        " based ping to node daemon port",
6481                                        errors.ECODE_ENVIRON)
6482
6483     if self.op.ndparams:
6484       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6485       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6486       self.new_ndparams = new_ndparams
6487
6488     if self.op.hv_state:
6489       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6490                                                  self.node.hv_state_static)
6491
6492     if self.op.disk_state:
6493       self.new_disk_state = \
6494         _MergeAndVerifyDiskState(self.op.disk_state,
6495                                  self.node.disk_state_static)
6496
6497   def Exec(self, feedback_fn):
6498     """Modifies a node.
6499
6500     """
6501     node = self.node
6502     old_role = self.old_role
6503     new_role = self.new_role
6504
6505     result = []
6506
6507     if self.op.ndparams:
6508       node.ndparams = self.new_ndparams
6509
6510     if self.op.powered is not None:
6511       node.powered = self.op.powered
6512
6513     if self.op.hv_state:
6514       node.hv_state_static = self.new_hv_state
6515
6516     if self.op.disk_state:
6517       node.disk_state_static = self.new_disk_state
6518
6519     for attr in ["master_capable", "vm_capable"]:
6520       val = getattr(self.op, attr)
6521       if val is not None:
6522         setattr(node, attr, val)
6523         result.append((attr, str(val)))
6524
6525     if new_role != old_role:
6526       # Tell the node to demote itself, if no longer MC and not offline
6527       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6528         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6529         if msg:
6530           self.LogWarning("Node failed to demote itself: %s", msg)
6531
6532       new_flags = self._R2F[new_role]
6533       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6534         if of != nf:
6535           result.append((desc, str(nf)))
6536       (node.master_candidate, node.drained, node.offline) = new_flags
6537
6538       # we locked all nodes, we adjust the CP before updating this node
6539       if self.lock_all:
6540         _AdjustCandidatePool(self, [node.name])
6541
6542     if self.op.secondary_ip:
6543       node.secondary_ip = self.op.secondary_ip
6544       result.append(("secondary_ip", self.op.secondary_ip))
6545
6546     # this will trigger configuration file update, if needed
6547     self.cfg.Update(node, feedback_fn)
6548
6549     # this will trigger job queue propagation or cleanup if the mc
6550     # flag changed
6551     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6552       self.context.ReaddNode(node)
6553
6554     return result
6555
6556
6557 class LUNodePowercycle(NoHooksLU):
6558   """Powercycles a node.
6559
6560   """
6561   REQ_BGL = False
6562
6563   def CheckArguments(self):
6564     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6565     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6566       raise errors.OpPrereqError("The node is the master and the force"
6567                                  " parameter was not set",
6568                                  errors.ECODE_INVAL)
6569
6570   def ExpandNames(self):
6571     """Locking for PowercycleNode.
6572
6573     This is a last-resort option and shouldn't block on other
6574     jobs. Therefore, we grab no locks.
6575
6576     """
6577     self.needed_locks = {}
6578
6579   def Exec(self, feedback_fn):
6580     """Reboots a node.
6581
6582     """
6583     result = self.rpc.call_node_powercycle(self.op.node_name,
6584                                            self.cfg.GetHypervisorType())
6585     result.Raise("Failed to schedule the reboot")
6586     return result.payload
6587
6588
6589 class LUClusterQuery(NoHooksLU):
6590   """Query cluster configuration.
6591
6592   """
6593   REQ_BGL = False
6594
6595   def ExpandNames(self):
6596     self.needed_locks = {}
6597
6598   def Exec(self, feedback_fn):
6599     """Return cluster config.
6600
6601     """
6602     cluster = self.cfg.GetClusterInfo()
6603     os_hvp = {}
6604
6605     # Filter just for enabled hypervisors
6606     for os_name, hv_dict in cluster.os_hvp.items():
6607       os_hvp[os_name] = {}
6608       for hv_name, hv_params in hv_dict.items():
6609         if hv_name in cluster.enabled_hypervisors:
6610           os_hvp[os_name][hv_name] = hv_params
6611
6612     # Convert ip_family to ip_version
6613     primary_ip_version = constants.IP4_VERSION
6614     if cluster.primary_ip_family == netutils.IP6Address.family:
6615       primary_ip_version = constants.IP6_VERSION
6616
6617     result = {
6618       "software_version": constants.RELEASE_VERSION,
6619       "protocol_version": constants.PROTOCOL_VERSION,
6620       "config_version": constants.CONFIG_VERSION,
6621       "os_api_version": max(constants.OS_API_VERSIONS),
6622       "export_version": constants.EXPORT_VERSION,
6623       "architecture": runtime.GetArchInfo(),
6624       "name": cluster.cluster_name,
6625       "master": cluster.master_node,
6626       "default_hypervisor": cluster.primary_hypervisor,
6627       "enabled_hypervisors": cluster.enabled_hypervisors,
6628       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6629                         for hypervisor_name in cluster.enabled_hypervisors]),
6630       "os_hvp": os_hvp,
6631       "beparams": cluster.beparams,
6632       "osparams": cluster.osparams,
6633       "ipolicy": cluster.ipolicy,
6634       "nicparams": cluster.nicparams,
6635       "ndparams": cluster.ndparams,
6636       "diskparams": cluster.diskparams,
6637       "candidate_pool_size": cluster.candidate_pool_size,
6638       "master_netdev": cluster.master_netdev,
6639       "master_netmask": cluster.master_netmask,
6640       "use_external_mip_script": cluster.use_external_mip_script,
6641       "volume_group_name": cluster.volume_group_name,
6642       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6643       "file_storage_dir": cluster.file_storage_dir,
6644       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6645       "maintain_node_health": cluster.maintain_node_health,
6646       "ctime": cluster.ctime,
6647       "mtime": cluster.mtime,
6648       "uuid": cluster.uuid,
6649       "tags": list(cluster.GetTags()),
6650       "uid_pool": cluster.uid_pool,
6651       "default_iallocator": cluster.default_iallocator,
6652       "reserved_lvs": cluster.reserved_lvs,
6653       "primary_ip_version": primary_ip_version,
6654       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6655       "hidden_os": cluster.hidden_os,
6656       "blacklisted_os": cluster.blacklisted_os,
6657       }
6658
6659     return result
6660
6661
6662 class LUClusterConfigQuery(NoHooksLU):
6663   """Return configuration values.
6664
6665   """
6666   REQ_BGL = False
6667
6668   def CheckArguments(self):
6669     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6670
6671   def ExpandNames(self):
6672     self.cq.ExpandNames(self)
6673
6674   def DeclareLocks(self, level):
6675     self.cq.DeclareLocks(self, level)
6676
6677   def Exec(self, feedback_fn):
6678     result = self.cq.OldStyleQuery(self)
6679
6680     assert len(result) == 1
6681
6682     return result[0]
6683
6684
6685 class _ClusterQuery(_QueryBase):
6686   FIELDS = query.CLUSTER_FIELDS
6687
6688   #: Do not sort (there is only one item)
6689   SORT_FIELD = None
6690
6691   def ExpandNames(self, lu):
6692     lu.needed_locks = {}
6693
6694     # The following variables interact with _QueryBase._GetNames
6695     self.wanted = locking.ALL_SET
6696     self.do_locking = self.use_locking
6697
6698     if self.do_locking:
6699       raise errors.OpPrereqError("Can not use locking for cluster queries",
6700                                  errors.ECODE_INVAL)
6701
6702   def DeclareLocks(self, lu, level):
6703     pass
6704
6705   def _GetQueryData(self, lu):
6706     """Computes the list of nodes and their attributes.
6707
6708     """
6709     # Locking is not used
6710     assert not (compat.any(lu.glm.is_owned(level)
6711                            for level in locking.LEVELS
6712                            if level != locking.LEVEL_CLUSTER) or
6713                 self.do_locking or self.use_locking)
6714
6715     if query.CQ_CONFIG in self.requested_data:
6716       cluster = lu.cfg.GetClusterInfo()
6717     else:
6718       cluster = NotImplemented
6719
6720     if query.CQ_QUEUE_DRAINED in self.requested_data:
6721       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6722     else:
6723       drain_flag = NotImplemented
6724
6725     if query.CQ_WATCHER_PAUSE in self.requested_data:
6726       master_name = lu.cfg.GetMasterNode()
6727
6728       result = lu.rpc.call_get_watcher_pause(master_name)
6729       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6730                    master_name)
6731
6732       watcher_pause = result.payload
6733     else:
6734       watcher_pause = NotImplemented
6735
6736     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6737
6738
6739 class LUInstanceActivateDisks(NoHooksLU):
6740   """Bring up an instance's disks.
6741
6742   """
6743   REQ_BGL = False
6744
6745   def ExpandNames(self):
6746     self._ExpandAndLockInstance()
6747     self.needed_locks[locking.LEVEL_NODE] = []
6748     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6749
6750   def DeclareLocks(self, level):
6751     if level == locking.LEVEL_NODE:
6752       self._LockInstancesNodes()
6753
6754   def CheckPrereq(self):
6755     """Check prerequisites.
6756
6757     This checks that the instance is in the cluster.
6758
6759     """
6760     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6761     assert self.instance is not None, \
6762       "Cannot retrieve locked instance %s" % self.op.instance_name
6763     _CheckNodeOnline(self, self.instance.primary_node)
6764
6765   def Exec(self, feedback_fn):
6766     """Activate the disks.
6767
6768     """
6769     disks_ok, disks_info = \
6770               _AssembleInstanceDisks(self, self.instance,
6771                                      ignore_size=self.op.ignore_size)
6772     if not disks_ok:
6773       raise errors.OpExecError("Cannot activate block devices")
6774
6775     if self.op.wait_for_sync:
6776       if not _WaitForSync(self, self.instance):
6777         raise errors.OpExecError("Some disks of the instance are degraded!")
6778
6779     return disks_info
6780
6781
6782 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6783                            ignore_size=False):
6784   """Prepare the block devices for an instance.
6785
6786   This sets up the block devices on all nodes.
6787
6788   @type lu: L{LogicalUnit}
6789   @param lu: the logical unit on whose behalf we execute
6790   @type instance: L{objects.Instance}
6791   @param instance: the instance for whose disks we assemble
6792   @type disks: list of L{objects.Disk} or None
6793   @param disks: which disks to assemble (or all, if None)
6794   @type ignore_secondaries: boolean
6795   @param ignore_secondaries: if true, errors on secondary nodes
6796       won't result in an error return from the function
6797   @type ignore_size: boolean
6798   @param ignore_size: if true, the current known size of the disk
6799       will not be used during the disk activation, useful for cases
6800       when the size is wrong
6801   @return: False if the operation failed, otherwise a list of
6802       (host, instance_visible_name, node_visible_name)
6803       with the mapping from node devices to instance devices
6804
6805   """
6806   device_info = []
6807   disks_ok = True
6808   iname = instance.name
6809   disks = _ExpandCheckDisks(instance, disks)
6810
6811   # With the two passes mechanism we try to reduce the window of
6812   # opportunity for the race condition of switching DRBD to primary
6813   # before handshaking occured, but we do not eliminate it
6814
6815   # The proper fix would be to wait (with some limits) until the
6816   # connection has been made and drbd transitions from WFConnection
6817   # into any other network-connected state (Connected, SyncTarget,
6818   # SyncSource, etc.)
6819
6820   # 1st pass, assemble on all nodes in secondary mode
6821   for idx, inst_disk in enumerate(disks):
6822     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6823       if ignore_size:
6824         node_disk = node_disk.Copy()
6825         node_disk.UnsetSize()
6826       lu.cfg.SetDiskID(node_disk, node)
6827       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6828                                              False, idx)
6829       msg = result.fail_msg
6830       if msg:
6831         is_offline_secondary = (node in instance.secondary_nodes and
6832                                 result.offline)
6833         lu.LogWarning("Could not prepare block device %s on node %s"
6834                       " (is_primary=False, pass=1): %s",
6835                       inst_disk.iv_name, node, msg)
6836         if not (ignore_secondaries or is_offline_secondary):
6837           disks_ok = False
6838
6839   # FIXME: race condition on drbd migration to primary
6840
6841   # 2nd pass, do only the primary node
6842   for idx, inst_disk in enumerate(disks):
6843     dev_path = None
6844
6845     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6846       if node != instance.primary_node:
6847         continue
6848       if ignore_size:
6849         node_disk = node_disk.Copy()
6850         node_disk.UnsetSize()
6851       lu.cfg.SetDiskID(node_disk, node)
6852       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6853                                              True, idx)
6854       msg = result.fail_msg
6855       if msg:
6856         lu.LogWarning("Could not prepare block device %s on node %s"
6857                       " (is_primary=True, pass=2): %s",
6858                       inst_disk.iv_name, node, msg)
6859         disks_ok = False
6860       else:
6861         dev_path = result.payload
6862
6863     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6864
6865   # leave the disks configured for the primary node
6866   # this is a workaround that would be fixed better by
6867   # improving the logical/physical id handling
6868   for disk in disks:
6869     lu.cfg.SetDiskID(disk, instance.primary_node)
6870
6871   return disks_ok, device_info
6872
6873
6874 def _StartInstanceDisks(lu, instance, force):
6875   """Start the disks of an instance.
6876
6877   """
6878   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6879                                            ignore_secondaries=force)
6880   if not disks_ok:
6881     _ShutdownInstanceDisks(lu, instance)
6882     if force is not None and not force:
6883       lu.LogWarning("",
6884                     hint=("If the message above refers to a secondary node,"
6885                           " you can retry the operation using '--force'"))
6886     raise errors.OpExecError("Disk consistency error")
6887
6888
6889 class LUInstanceDeactivateDisks(NoHooksLU):
6890   """Shutdown an instance's disks.
6891
6892   """
6893   REQ_BGL = False
6894
6895   def ExpandNames(self):
6896     self._ExpandAndLockInstance()
6897     self.needed_locks[locking.LEVEL_NODE] = []
6898     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6899
6900   def DeclareLocks(self, level):
6901     if level == locking.LEVEL_NODE:
6902       self._LockInstancesNodes()
6903
6904   def CheckPrereq(self):
6905     """Check prerequisites.
6906
6907     This checks that the instance is in the cluster.
6908
6909     """
6910     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6911     assert self.instance is not None, \
6912       "Cannot retrieve locked instance %s" % self.op.instance_name
6913
6914   def Exec(self, feedback_fn):
6915     """Deactivate the disks
6916
6917     """
6918     instance = self.instance
6919     if self.op.force:
6920       _ShutdownInstanceDisks(self, instance)
6921     else:
6922       _SafeShutdownInstanceDisks(self, instance)
6923
6924
6925 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6926   """Shutdown block devices of an instance.
6927
6928   This function checks if an instance is running, before calling
6929   _ShutdownInstanceDisks.
6930
6931   """
6932   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6933   _ShutdownInstanceDisks(lu, instance, disks=disks)
6934
6935
6936 def _ExpandCheckDisks(instance, disks):
6937   """Return the instance disks selected by the disks list
6938
6939   @type disks: list of L{objects.Disk} or None
6940   @param disks: selected disks
6941   @rtype: list of L{objects.Disk}
6942   @return: selected instance disks to act on
6943
6944   """
6945   if disks is None:
6946     return instance.disks
6947   else:
6948     if not set(disks).issubset(instance.disks):
6949       raise errors.ProgrammerError("Can only act on disks belonging to the"
6950                                    " target instance")
6951     return disks
6952
6953
6954 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6955   """Shutdown block devices of an instance.
6956
6957   This does the shutdown on all nodes of the instance.
6958
6959   If the ignore_primary is false, errors on the primary node are
6960   ignored.
6961
6962   """
6963   all_result = True
6964   disks = _ExpandCheckDisks(instance, disks)
6965
6966   for disk in disks:
6967     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6968       lu.cfg.SetDiskID(top_disk, node)
6969       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6970       msg = result.fail_msg
6971       if msg:
6972         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6973                       disk.iv_name, node, msg)
6974         if ((node == instance.primary_node and not ignore_primary) or
6975             (node != instance.primary_node and not result.offline)):
6976           all_result = False
6977   return all_result
6978
6979
6980 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6981   """Checks if a node has enough free memory.
6982
6983   This function checks if a given node has the needed amount of free
6984   memory. In case the node has less memory or we cannot get the
6985   information from the node, this function raises an OpPrereqError
6986   exception.
6987
6988   @type lu: C{LogicalUnit}
6989   @param lu: a logical unit from which we get configuration data
6990   @type node: C{str}
6991   @param node: the node to check
6992   @type reason: C{str}
6993   @param reason: string to use in the error message
6994   @type requested: C{int}
6995   @param requested: the amount of memory in MiB to check for
6996   @type hypervisor_name: C{str}
6997   @param hypervisor_name: the hypervisor to ask for memory stats
6998   @rtype: integer
6999   @return: node current free memory
7000   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7001       we cannot check the node
7002
7003   """
7004   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7005   nodeinfo[node].Raise("Can't get data from node %s" % node,
7006                        prereq=True, ecode=errors.ECODE_ENVIRON)
7007   (_, _, (hv_info, )) = nodeinfo[node].payload
7008
7009   free_mem = hv_info.get("memory_free", None)
7010   if not isinstance(free_mem, int):
7011     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7012                                " was '%s'" % (node, free_mem),
7013                                errors.ECODE_ENVIRON)
7014   if requested > free_mem:
7015     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7016                                " needed %s MiB, available %s MiB" %
7017                                (node, reason, requested, free_mem),
7018                                errors.ECODE_NORES)
7019   return free_mem
7020
7021
7022 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7023   """Checks if nodes have enough free disk space in all the VGs.
7024
7025   This function checks if all given nodes have the needed amount of
7026   free disk. In case any node has less disk or we cannot get the
7027   information from the node, this function raises an OpPrereqError
7028   exception.
7029
7030   @type lu: C{LogicalUnit}
7031   @param lu: a logical unit from which we get configuration data
7032   @type nodenames: C{list}
7033   @param nodenames: the list of node names to check
7034   @type req_sizes: C{dict}
7035   @param req_sizes: the hash of vg and corresponding amount of disk in
7036       MiB to check for
7037   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7038       or we cannot check the node
7039
7040   """
7041   for vg, req_size in req_sizes.items():
7042     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7043
7044
7045 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7046   """Checks if nodes have enough free disk space in the specified VG.
7047
7048   This function checks if all given nodes have the needed amount of
7049   free disk. In case any node has less disk or we cannot get the
7050   information from the node, this function raises an OpPrereqError
7051   exception.
7052
7053   @type lu: C{LogicalUnit}
7054   @param lu: a logical unit from which we get configuration data
7055   @type nodenames: C{list}
7056   @param nodenames: the list of node names to check
7057   @type vg: C{str}
7058   @param vg: the volume group to check
7059   @type requested: C{int}
7060   @param requested: the amount of disk in MiB to check for
7061   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7062       or we cannot check the node
7063
7064   """
7065   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7066   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7067   for node in nodenames:
7068     info = nodeinfo[node]
7069     info.Raise("Cannot get current information from node %s" % node,
7070                prereq=True, ecode=errors.ECODE_ENVIRON)
7071     (_, (vg_info, ), _) = info.payload
7072     vg_free = vg_info.get("vg_free", None)
7073     if not isinstance(vg_free, int):
7074       raise errors.OpPrereqError("Can't compute free disk space on node"
7075                                  " %s for vg %s, result was '%s'" %
7076                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7077     if requested > vg_free:
7078       raise errors.OpPrereqError("Not enough disk space on target node %s"
7079                                  " vg %s: required %d MiB, available %d MiB" %
7080                                  (node, vg, requested, vg_free),
7081                                  errors.ECODE_NORES)
7082
7083
7084 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7085   """Checks if nodes have enough physical CPUs
7086
7087   This function checks if all given nodes have the needed number of
7088   physical CPUs. In case any node has less CPUs or we cannot get the
7089   information from the node, this function raises an OpPrereqError
7090   exception.
7091
7092   @type lu: C{LogicalUnit}
7093   @param lu: a logical unit from which we get configuration data
7094   @type nodenames: C{list}
7095   @param nodenames: the list of node names to check
7096   @type requested: C{int}
7097   @param requested: the minimum acceptable number of physical CPUs
7098   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7099       or we cannot check the node
7100
7101   """
7102   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7103   for node in nodenames:
7104     info = nodeinfo[node]
7105     info.Raise("Cannot get current information from node %s" % node,
7106                prereq=True, ecode=errors.ECODE_ENVIRON)
7107     (_, _, (hv_info, )) = info.payload
7108     num_cpus = hv_info.get("cpu_total", None)
7109     if not isinstance(num_cpus, int):
7110       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7111                                  " on node %s, result was '%s'" %
7112                                  (node, num_cpus), errors.ECODE_ENVIRON)
7113     if requested > num_cpus:
7114       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7115                                  "required" % (node, num_cpus, requested),
7116                                  errors.ECODE_NORES)
7117
7118
7119 class LUInstanceStartup(LogicalUnit):
7120   """Starts an instance.
7121
7122   """
7123   HPATH = "instance-start"
7124   HTYPE = constants.HTYPE_INSTANCE
7125   REQ_BGL = False
7126
7127   def CheckArguments(self):
7128     # extra beparams
7129     if self.op.beparams:
7130       # fill the beparams dict
7131       objects.UpgradeBeParams(self.op.beparams)
7132       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7133
7134   def ExpandNames(self):
7135     self._ExpandAndLockInstance()
7136     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7137
7138   def DeclareLocks(self, level):
7139     if level == locking.LEVEL_NODE_RES:
7140       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7141
7142   def BuildHooksEnv(self):
7143     """Build hooks env.
7144
7145     This runs on master, primary and secondary nodes of the instance.
7146
7147     """
7148     env = {
7149       "FORCE": self.op.force,
7150       }
7151
7152     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7153
7154     return env
7155
7156   def BuildHooksNodes(self):
7157     """Build hooks nodes.
7158
7159     """
7160     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7161     return (nl, nl)
7162
7163   def CheckPrereq(self):
7164     """Check prerequisites.
7165
7166     This checks that the instance is in the cluster.
7167
7168     """
7169     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7170     assert self.instance is not None, \
7171       "Cannot retrieve locked instance %s" % self.op.instance_name
7172
7173     # extra hvparams
7174     if self.op.hvparams:
7175       # check hypervisor parameter syntax (locally)
7176       cluster = self.cfg.GetClusterInfo()
7177       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7178       filled_hvp = cluster.FillHV(instance)
7179       filled_hvp.update(self.op.hvparams)
7180       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7181       hv_type.CheckParameterSyntax(filled_hvp)
7182       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7183
7184     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7185
7186     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7187
7188     if self.primary_offline and self.op.ignore_offline_nodes:
7189       self.LogWarning("Ignoring offline primary node")
7190
7191       if self.op.hvparams or self.op.beparams:
7192         self.LogWarning("Overridden parameters are ignored")
7193     else:
7194       _CheckNodeOnline(self, instance.primary_node)
7195
7196       bep = self.cfg.GetClusterInfo().FillBE(instance)
7197       bep.update(self.op.beparams)
7198
7199       # check bridges existence
7200       _CheckInstanceBridgesExist(self, instance)
7201
7202       remote_info = self.rpc.call_instance_info(instance.primary_node,
7203                                                 instance.name,
7204                                                 instance.hypervisor)
7205       remote_info.Raise("Error checking node %s" % instance.primary_node,
7206                         prereq=True, ecode=errors.ECODE_ENVIRON)
7207       if not remote_info.payload: # not running already
7208         _CheckNodeFreeMemory(self, instance.primary_node,
7209                              "starting instance %s" % instance.name,
7210                              bep[constants.BE_MINMEM], instance.hypervisor)
7211
7212   def Exec(self, feedback_fn):
7213     """Start the instance.
7214
7215     """
7216     instance = self.instance
7217     force = self.op.force
7218
7219     if not self.op.no_remember:
7220       self.cfg.MarkInstanceUp(instance.name)
7221
7222     if self.primary_offline:
7223       assert self.op.ignore_offline_nodes
7224       self.LogInfo("Primary node offline, marked instance as started")
7225     else:
7226       node_current = instance.primary_node
7227
7228       _StartInstanceDisks(self, instance, force)
7229
7230       result = \
7231         self.rpc.call_instance_start(node_current,
7232                                      (instance, self.op.hvparams,
7233                                       self.op.beparams),
7234                                      self.op.startup_paused)
7235       msg = result.fail_msg
7236       if msg:
7237         _ShutdownInstanceDisks(self, instance)
7238         raise errors.OpExecError("Could not start instance: %s" % msg)
7239
7240
7241 class LUInstanceReboot(LogicalUnit):
7242   """Reboot an instance.
7243
7244   """
7245   HPATH = "instance-reboot"
7246   HTYPE = constants.HTYPE_INSTANCE
7247   REQ_BGL = False
7248
7249   def ExpandNames(self):
7250     self._ExpandAndLockInstance()
7251
7252   def BuildHooksEnv(self):
7253     """Build hooks env.
7254
7255     This runs on master, primary and secondary nodes of the instance.
7256
7257     """
7258     env = {
7259       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7260       "REBOOT_TYPE": self.op.reboot_type,
7261       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7262       }
7263
7264     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7265
7266     return env
7267
7268   def BuildHooksNodes(self):
7269     """Build hooks nodes.
7270
7271     """
7272     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7273     return (nl, nl)
7274
7275   def CheckPrereq(self):
7276     """Check prerequisites.
7277
7278     This checks that the instance is in the cluster.
7279
7280     """
7281     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7282     assert self.instance is not None, \
7283       "Cannot retrieve locked instance %s" % self.op.instance_name
7284     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7285     _CheckNodeOnline(self, instance.primary_node)
7286
7287     # check bridges existence
7288     _CheckInstanceBridgesExist(self, instance)
7289
7290   def Exec(self, feedback_fn):
7291     """Reboot the instance.
7292
7293     """
7294     instance = self.instance
7295     ignore_secondaries = self.op.ignore_secondaries
7296     reboot_type = self.op.reboot_type
7297
7298     remote_info = self.rpc.call_instance_info(instance.primary_node,
7299                                               instance.name,
7300                                               instance.hypervisor)
7301     remote_info.Raise("Error checking node %s" % instance.primary_node)
7302     instance_running = bool(remote_info.payload)
7303
7304     node_current = instance.primary_node
7305
7306     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7307                                             constants.INSTANCE_REBOOT_HARD]:
7308       for disk in instance.disks:
7309         self.cfg.SetDiskID(disk, node_current)
7310       result = self.rpc.call_instance_reboot(node_current, instance,
7311                                              reboot_type,
7312                                              self.op.shutdown_timeout)
7313       result.Raise("Could not reboot instance")
7314     else:
7315       if instance_running:
7316         result = self.rpc.call_instance_shutdown(node_current, instance,
7317                                                  self.op.shutdown_timeout)
7318         result.Raise("Could not shutdown instance for full reboot")
7319         _ShutdownInstanceDisks(self, instance)
7320       else:
7321         self.LogInfo("Instance %s was already stopped, starting now",
7322                      instance.name)
7323       _StartInstanceDisks(self, instance, ignore_secondaries)
7324       result = self.rpc.call_instance_start(node_current,
7325                                             (instance, None, None), False)
7326       msg = result.fail_msg
7327       if msg:
7328         _ShutdownInstanceDisks(self, instance)
7329         raise errors.OpExecError("Could not start instance for"
7330                                  " full reboot: %s" % msg)
7331
7332     self.cfg.MarkInstanceUp(instance.name)
7333
7334
7335 class LUInstanceShutdown(LogicalUnit):
7336   """Shutdown an instance.
7337
7338   """
7339   HPATH = "instance-stop"
7340   HTYPE = constants.HTYPE_INSTANCE
7341   REQ_BGL = False
7342
7343   def ExpandNames(self):
7344     self._ExpandAndLockInstance()
7345
7346   def BuildHooksEnv(self):
7347     """Build hooks env.
7348
7349     This runs on master, primary and secondary nodes of the instance.
7350
7351     """
7352     env = _BuildInstanceHookEnvByObject(self, self.instance)
7353     env["TIMEOUT"] = self.op.timeout
7354     return env
7355
7356   def BuildHooksNodes(self):
7357     """Build hooks nodes.
7358
7359     """
7360     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7361     return (nl, nl)
7362
7363   def CheckPrereq(self):
7364     """Check prerequisites.
7365
7366     This checks that the instance is in the cluster.
7367
7368     """
7369     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7370     assert self.instance is not None, \
7371       "Cannot retrieve locked instance %s" % self.op.instance_name
7372
7373     if not self.op.force:
7374       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7375     else:
7376       self.LogWarning("Ignoring offline instance check")
7377
7378     self.primary_offline = \
7379       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7380
7381     if self.primary_offline and self.op.ignore_offline_nodes:
7382       self.LogWarning("Ignoring offline primary node")
7383     else:
7384       _CheckNodeOnline(self, self.instance.primary_node)
7385
7386   def Exec(self, feedback_fn):
7387     """Shutdown the instance.
7388
7389     """
7390     instance = self.instance
7391     node_current = instance.primary_node
7392     timeout = self.op.timeout
7393
7394     # If the instance is offline we shouldn't mark it as down, as that
7395     # resets the offline flag.
7396     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7397       self.cfg.MarkInstanceDown(instance.name)
7398
7399     if self.primary_offline:
7400       assert self.op.ignore_offline_nodes
7401       self.LogInfo("Primary node offline, marked instance as stopped")
7402     else:
7403       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7404       msg = result.fail_msg
7405       if msg:
7406         self.LogWarning("Could not shutdown instance: %s", msg)
7407
7408       _ShutdownInstanceDisks(self, instance)
7409
7410
7411 class LUInstanceReinstall(LogicalUnit):
7412   """Reinstall an instance.
7413
7414   """
7415   HPATH = "instance-reinstall"
7416   HTYPE = constants.HTYPE_INSTANCE
7417   REQ_BGL = False
7418
7419   def ExpandNames(self):
7420     self._ExpandAndLockInstance()
7421
7422   def BuildHooksEnv(self):
7423     """Build hooks env.
7424
7425     This runs on master, primary and secondary nodes of the instance.
7426
7427     """
7428     return _BuildInstanceHookEnvByObject(self, self.instance)
7429
7430   def BuildHooksNodes(self):
7431     """Build hooks nodes.
7432
7433     """
7434     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7435     return (nl, nl)
7436
7437   def CheckPrereq(self):
7438     """Check prerequisites.
7439
7440     This checks that the instance is in the cluster and is not running.
7441
7442     """
7443     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7444     assert instance is not None, \
7445       "Cannot retrieve locked instance %s" % self.op.instance_name
7446     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7447                      " offline, cannot reinstall")
7448
7449     if instance.disk_template == constants.DT_DISKLESS:
7450       raise errors.OpPrereqError("Instance '%s' has no disks" %
7451                                  self.op.instance_name,
7452                                  errors.ECODE_INVAL)
7453     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7454
7455     if self.op.os_type is not None:
7456       # OS verification
7457       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7458       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7459       instance_os = self.op.os_type
7460     else:
7461       instance_os = instance.os
7462
7463     nodelist = list(instance.all_nodes)
7464
7465     if self.op.osparams:
7466       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7467       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7468       self.os_inst = i_osdict # the new dict (without defaults)
7469     else:
7470       self.os_inst = None
7471
7472     self.instance = instance
7473
7474   def Exec(self, feedback_fn):
7475     """Reinstall the instance.
7476
7477     """
7478     inst = self.instance
7479
7480     if self.op.os_type is not None:
7481       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7482       inst.os = self.op.os_type
7483       # Write to configuration
7484       self.cfg.Update(inst, feedback_fn)
7485
7486     _StartInstanceDisks(self, inst, None)
7487     try:
7488       feedback_fn("Running the instance OS create scripts...")
7489       # FIXME: pass debug option from opcode to backend
7490       result = self.rpc.call_instance_os_add(inst.primary_node,
7491                                              (inst, self.os_inst), True,
7492                                              self.op.debug_level)
7493       result.Raise("Could not install OS for instance %s on node %s" %
7494                    (inst.name, inst.primary_node))
7495     finally:
7496       _ShutdownInstanceDisks(self, inst)
7497
7498
7499 class LUInstanceRecreateDisks(LogicalUnit):
7500   """Recreate an instance's missing disks.
7501
7502   """
7503   HPATH = "instance-recreate-disks"
7504   HTYPE = constants.HTYPE_INSTANCE
7505   REQ_BGL = False
7506
7507   _MODIFYABLE = compat.UniqueFrozenset([
7508     constants.IDISK_SIZE,
7509     constants.IDISK_MODE,
7510     ])
7511
7512   # New or changed disk parameters may have different semantics
7513   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7514     constants.IDISK_ADOPT,
7515
7516     # TODO: Implement support changing VG while recreating
7517     constants.IDISK_VG,
7518     constants.IDISK_METAVG,
7519     constants.IDISK_PROVIDER,
7520     ]))
7521
7522   def _RunAllocator(self):
7523     """Run the allocator based on input opcode.
7524
7525     """
7526     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7527
7528     # FIXME
7529     # The allocator should actually run in "relocate" mode, but current
7530     # allocators don't support relocating all the nodes of an instance at
7531     # the same time. As a workaround we use "allocate" mode, but this is
7532     # suboptimal for two reasons:
7533     # - The instance name passed to the allocator is present in the list of
7534     #   existing instances, so there could be a conflict within the
7535     #   internal structures of the allocator. This doesn't happen with the
7536     #   current allocators, but it's a liability.
7537     # - The allocator counts the resources used by the instance twice: once
7538     #   because the instance exists already, and once because it tries to
7539     #   allocate a new instance.
7540     # The allocator could choose some of the nodes on which the instance is
7541     # running, but that's not a problem. If the instance nodes are broken,
7542     # they should be already be marked as drained or offline, and hence
7543     # skipped by the allocator. If instance disks have been lost for other
7544     # reasons, then recreating the disks on the same nodes should be fine.
7545     disk_template = self.instance.disk_template
7546     spindle_use = be_full[constants.BE_SPINDLE_USE]
7547     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7548                                         disk_template=disk_template,
7549                                         tags=list(self.instance.GetTags()),
7550                                         os=self.instance.os,
7551                                         nics=[{}],
7552                                         vcpus=be_full[constants.BE_VCPUS],
7553                                         memory=be_full[constants.BE_MAXMEM],
7554                                         spindle_use=spindle_use,
7555                                         disks=[{constants.IDISK_SIZE: d.size,
7556                                                 constants.IDISK_MODE: d.mode}
7557                                                 for d in self.instance.disks],
7558                                         hypervisor=self.instance.hypervisor,
7559                                         node_whitelist=None)
7560     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7561
7562     ial.Run(self.op.iallocator)
7563
7564     assert req.RequiredNodes() == len(self.instance.all_nodes)
7565
7566     if not ial.success:
7567       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7568                                  " %s" % (self.op.iallocator, ial.info),
7569                                  errors.ECODE_NORES)
7570
7571     self.op.nodes = ial.result
7572     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7573                  self.op.instance_name, self.op.iallocator,
7574                  utils.CommaJoin(ial.result))
7575
7576   def CheckArguments(self):
7577     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7578       # Normalize and convert deprecated list of disk indices
7579       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7580
7581     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7582     if duplicates:
7583       raise errors.OpPrereqError("Some disks have been specified more than"
7584                                  " once: %s" % utils.CommaJoin(duplicates),
7585                                  errors.ECODE_INVAL)
7586
7587     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7588     # when neither iallocator nor nodes are specified
7589     if self.op.iallocator or self.op.nodes:
7590       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7591
7592     for (idx, params) in self.op.disks:
7593       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7594       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7595       if unsupported:
7596         raise errors.OpPrereqError("Parameters for disk %s try to change"
7597                                    " unmodifyable parameter(s): %s" %
7598                                    (idx, utils.CommaJoin(unsupported)),
7599                                    errors.ECODE_INVAL)
7600
7601   def ExpandNames(self):
7602     self._ExpandAndLockInstance()
7603     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7604
7605     if self.op.nodes:
7606       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7607       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7608     else:
7609       self.needed_locks[locking.LEVEL_NODE] = []
7610       if self.op.iallocator:
7611         # iallocator will select a new node in the same group
7612         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7613         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7614
7615     self.needed_locks[locking.LEVEL_NODE_RES] = []
7616
7617   def DeclareLocks(self, level):
7618     if level == locking.LEVEL_NODEGROUP:
7619       assert self.op.iallocator is not None
7620       assert not self.op.nodes
7621       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7622       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7623       # Lock the primary group used by the instance optimistically; this
7624       # requires going via the node before it's locked, requiring
7625       # verification later on
7626       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7627         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7628
7629     elif level == locking.LEVEL_NODE:
7630       # If an allocator is used, then we lock all the nodes in the current
7631       # instance group, as we don't know yet which ones will be selected;
7632       # if we replace the nodes without using an allocator, locks are
7633       # already declared in ExpandNames; otherwise, we need to lock all the
7634       # instance nodes for disk re-creation
7635       if self.op.iallocator:
7636         assert not self.op.nodes
7637         assert not self.needed_locks[locking.LEVEL_NODE]
7638         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7639
7640         # Lock member nodes of the group of the primary node
7641         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7642           self.needed_locks[locking.LEVEL_NODE].extend(
7643             self.cfg.GetNodeGroup(group_uuid).members)
7644
7645         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7646       elif not self.op.nodes:
7647         self._LockInstancesNodes(primary_only=False)
7648     elif level == locking.LEVEL_NODE_RES:
7649       # Copy node locks
7650       self.needed_locks[locking.LEVEL_NODE_RES] = \
7651         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7652
7653   def BuildHooksEnv(self):
7654     """Build hooks env.
7655
7656     This runs on master, primary and secondary nodes of the instance.
7657
7658     """
7659     return _BuildInstanceHookEnvByObject(self, self.instance)
7660
7661   def BuildHooksNodes(self):
7662     """Build hooks nodes.
7663
7664     """
7665     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7666     return (nl, nl)
7667
7668   def CheckPrereq(self):
7669     """Check prerequisites.
7670
7671     This checks that the instance is in the cluster and is not running.
7672
7673     """
7674     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7675     assert instance is not None, \
7676       "Cannot retrieve locked instance %s" % self.op.instance_name
7677     if self.op.nodes:
7678       if len(self.op.nodes) != len(instance.all_nodes):
7679         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7680                                    " %d replacement nodes were specified" %
7681                                    (instance.name, len(instance.all_nodes),
7682                                     len(self.op.nodes)),
7683                                    errors.ECODE_INVAL)
7684       assert instance.disk_template != constants.DT_DRBD8 or \
7685           len(self.op.nodes) == 2
7686       assert instance.disk_template != constants.DT_PLAIN or \
7687           len(self.op.nodes) == 1
7688       primary_node = self.op.nodes[0]
7689     else:
7690       primary_node = instance.primary_node
7691     if not self.op.iallocator:
7692       _CheckNodeOnline(self, primary_node)
7693
7694     if instance.disk_template == constants.DT_DISKLESS:
7695       raise errors.OpPrereqError("Instance '%s' has no disks" %
7696                                  self.op.instance_name, errors.ECODE_INVAL)
7697
7698     # Verify if node group locks are still correct
7699     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7700     if owned_groups:
7701       # Node group locks are acquired only for the primary node (and only
7702       # when the allocator is used)
7703       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7704                                primary_only=True)
7705
7706     # if we replace nodes *and* the old primary is offline, we don't
7707     # check the instance state
7708     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7709     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7710       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7711                           msg="cannot recreate disks")
7712
7713     if self.op.disks:
7714       self.disks = dict(self.op.disks)
7715     else:
7716       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7717
7718     maxidx = max(self.disks.keys())
7719     if maxidx >= len(instance.disks):
7720       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7721                                  errors.ECODE_INVAL)
7722
7723     if ((self.op.nodes or self.op.iallocator) and
7724         sorted(self.disks.keys()) != range(len(instance.disks))):
7725       raise errors.OpPrereqError("Can't recreate disks partially and"
7726                                  " change the nodes at the same time",
7727                                  errors.ECODE_INVAL)
7728
7729     self.instance = instance
7730
7731     if self.op.iallocator:
7732       self._RunAllocator()
7733       # Release unneeded node and node resource locks
7734       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7735       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7736       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7737
7738     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7739
7740   def Exec(self, feedback_fn):
7741     """Recreate the disks.
7742
7743     """
7744     instance = self.instance
7745
7746     assert (self.owned_locks(locking.LEVEL_NODE) ==
7747             self.owned_locks(locking.LEVEL_NODE_RES))
7748
7749     to_skip = []
7750     mods = [] # keeps track of needed changes
7751
7752     for idx, disk in enumerate(instance.disks):
7753       try:
7754         changes = self.disks[idx]
7755       except KeyError:
7756         # Disk should not be recreated
7757         to_skip.append(idx)
7758         continue
7759
7760       # update secondaries for disks, if needed
7761       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7762         # need to update the nodes and minors
7763         assert len(self.op.nodes) == 2
7764         assert len(disk.logical_id) == 6 # otherwise disk internals
7765                                          # have changed
7766         (_, _, old_port, _, _, old_secret) = disk.logical_id
7767         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7768         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7769                   new_minors[0], new_minors[1], old_secret)
7770         assert len(disk.logical_id) == len(new_id)
7771       else:
7772         new_id = None
7773
7774       mods.append((idx, new_id, changes))
7775
7776     # now that we have passed all asserts above, we can apply the mods
7777     # in a single run (to avoid partial changes)
7778     for idx, new_id, changes in mods:
7779       disk = instance.disks[idx]
7780       if new_id is not None:
7781         assert disk.dev_type == constants.LD_DRBD8
7782         disk.logical_id = new_id
7783       if changes:
7784         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7785                     mode=changes.get(constants.IDISK_MODE, None))
7786
7787     # change primary node, if needed
7788     if self.op.nodes:
7789       instance.primary_node = self.op.nodes[0]
7790       self.LogWarning("Changing the instance's nodes, you will have to"
7791                       " remove any disks left on the older nodes manually")
7792
7793     if self.op.nodes:
7794       self.cfg.Update(instance, feedback_fn)
7795
7796     # All touched nodes must be locked
7797     mylocks = self.owned_locks(locking.LEVEL_NODE)
7798     assert mylocks.issuperset(frozenset(instance.all_nodes))
7799     _CreateDisks(self, instance, to_skip=to_skip)
7800
7801
7802 class LUInstanceRename(LogicalUnit):
7803   """Rename an instance.
7804
7805   """
7806   HPATH = "instance-rename"
7807   HTYPE = constants.HTYPE_INSTANCE
7808
7809   def CheckArguments(self):
7810     """Check arguments.
7811
7812     """
7813     if self.op.ip_check and not self.op.name_check:
7814       # TODO: make the ip check more flexible and not depend on the name check
7815       raise errors.OpPrereqError("IP address check requires a name check",
7816                                  errors.ECODE_INVAL)
7817
7818   def BuildHooksEnv(self):
7819     """Build hooks env.
7820
7821     This runs on master, primary and secondary nodes of the instance.
7822
7823     """
7824     env = _BuildInstanceHookEnvByObject(self, self.instance)
7825     env["INSTANCE_NEW_NAME"] = self.op.new_name
7826     return env
7827
7828   def BuildHooksNodes(self):
7829     """Build hooks nodes.
7830
7831     """
7832     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7833     return (nl, nl)
7834
7835   def CheckPrereq(self):
7836     """Check prerequisites.
7837
7838     This checks that the instance is in the cluster and is not running.
7839
7840     """
7841     self.op.instance_name = _ExpandInstanceName(self.cfg,
7842                                                 self.op.instance_name)
7843     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7844     assert instance is not None
7845     _CheckNodeOnline(self, instance.primary_node)
7846     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7847                         msg="cannot rename")
7848     self.instance = instance
7849
7850     new_name = self.op.new_name
7851     if self.op.name_check:
7852       hostname = _CheckHostnameSane(self, new_name)
7853       new_name = self.op.new_name = hostname.name
7854       if (self.op.ip_check and
7855           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7856         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7857                                    (hostname.ip, new_name),
7858                                    errors.ECODE_NOTUNIQUE)
7859
7860     instance_list = self.cfg.GetInstanceList()
7861     if new_name in instance_list and new_name != instance.name:
7862       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7863                                  new_name, errors.ECODE_EXISTS)
7864
7865   def Exec(self, feedback_fn):
7866     """Rename the instance.
7867
7868     """
7869     inst = self.instance
7870     old_name = inst.name
7871
7872     rename_file_storage = False
7873     if (inst.disk_template in constants.DTS_FILEBASED and
7874         self.op.new_name != inst.name):
7875       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7876       rename_file_storage = True
7877
7878     self.cfg.RenameInstance(inst.name, self.op.new_name)
7879     # Change the instance lock. This is definitely safe while we hold the BGL.
7880     # Otherwise the new lock would have to be added in acquired mode.
7881     assert self.REQ_BGL
7882     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7883     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7884     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7885
7886     # re-read the instance from the configuration after rename
7887     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7888
7889     if rename_file_storage:
7890       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7891       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7892                                                      old_file_storage_dir,
7893                                                      new_file_storage_dir)
7894       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7895                    " (but the instance has been renamed in Ganeti)" %
7896                    (inst.primary_node, old_file_storage_dir,
7897                     new_file_storage_dir))
7898
7899     _StartInstanceDisks(self, inst, None)
7900     # update info on disks
7901     info = _GetInstanceInfoText(inst)
7902     for (idx, disk) in enumerate(inst.disks):
7903       for node in inst.all_nodes:
7904         self.cfg.SetDiskID(disk, node)
7905         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7906         if result.fail_msg:
7907           self.LogWarning("Error setting info on node %s for disk %s: %s",
7908                           node, idx, result.fail_msg)
7909     try:
7910       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7911                                                  old_name, self.op.debug_level)
7912       msg = result.fail_msg
7913       if msg:
7914         msg = ("Could not run OS rename script for instance %s on node %s"
7915                " (but the instance has been renamed in Ganeti): %s" %
7916                (inst.name, inst.primary_node, msg))
7917         self.LogWarning(msg)
7918     finally:
7919       _ShutdownInstanceDisks(self, inst)
7920
7921     return inst.name
7922
7923
7924 class LUInstanceRemove(LogicalUnit):
7925   """Remove an instance.
7926
7927   """
7928   HPATH = "instance-remove"
7929   HTYPE = constants.HTYPE_INSTANCE
7930   REQ_BGL = False
7931
7932   def ExpandNames(self):
7933     self._ExpandAndLockInstance()
7934     self.needed_locks[locking.LEVEL_NODE] = []
7935     self.needed_locks[locking.LEVEL_NODE_RES] = []
7936     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7937
7938   def DeclareLocks(self, level):
7939     if level == locking.LEVEL_NODE:
7940       self._LockInstancesNodes()
7941     elif level == locking.LEVEL_NODE_RES:
7942       # Copy node locks
7943       self.needed_locks[locking.LEVEL_NODE_RES] = \
7944         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7945
7946   def BuildHooksEnv(self):
7947     """Build hooks env.
7948
7949     This runs on master, primary and secondary nodes of the instance.
7950
7951     """
7952     env = _BuildInstanceHookEnvByObject(self, self.instance)
7953     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7954     return env
7955
7956   def BuildHooksNodes(self):
7957     """Build hooks nodes.
7958
7959     """
7960     nl = [self.cfg.GetMasterNode()]
7961     nl_post = list(self.instance.all_nodes) + nl
7962     return (nl, nl_post)
7963
7964   def CheckPrereq(self):
7965     """Check prerequisites.
7966
7967     This checks that the instance is in the cluster.
7968
7969     """
7970     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7971     assert self.instance is not None, \
7972       "Cannot retrieve locked instance %s" % self.op.instance_name
7973
7974   def Exec(self, feedback_fn):
7975     """Remove the instance.
7976
7977     """
7978     instance = self.instance
7979     logging.info("Shutting down instance %s on node %s",
7980                  instance.name, instance.primary_node)
7981
7982     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7983                                              self.op.shutdown_timeout)
7984     msg = result.fail_msg
7985     if msg:
7986       if self.op.ignore_failures:
7987         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7988       else:
7989         raise errors.OpExecError("Could not shutdown instance %s on"
7990                                  " node %s: %s" %
7991                                  (instance.name, instance.primary_node, msg))
7992
7993     assert (self.owned_locks(locking.LEVEL_NODE) ==
7994             self.owned_locks(locking.LEVEL_NODE_RES))
7995     assert not (set(instance.all_nodes) -
7996                 self.owned_locks(locking.LEVEL_NODE)), \
7997       "Not owning correct locks"
7998
7999     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8000
8001
8002 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8003   """Utility function to remove an instance.
8004
8005   """
8006   logging.info("Removing block devices for instance %s", instance.name)
8007
8008   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8009     if not ignore_failures:
8010       raise errors.OpExecError("Can't remove instance's disks")
8011     feedback_fn("Warning: can't remove instance's disks")
8012
8013   logging.info("Removing instance %s out of cluster config", instance.name)
8014
8015   lu.cfg.RemoveInstance(instance.name)
8016
8017   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8018     "Instance lock removal conflict"
8019
8020   # Remove lock for the instance
8021   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8022
8023
8024 class LUInstanceQuery(NoHooksLU):
8025   """Logical unit for querying instances.
8026
8027   """
8028   # pylint: disable=W0142
8029   REQ_BGL = False
8030
8031   def CheckArguments(self):
8032     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8033                              self.op.output_fields, self.op.use_locking)
8034
8035   def ExpandNames(self):
8036     self.iq.ExpandNames(self)
8037
8038   def DeclareLocks(self, level):
8039     self.iq.DeclareLocks(self, level)
8040
8041   def Exec(self, feedback_fn):
8042     return self.iq.OldStyleQuery(self)
8043
8044
8045 def _ExpandNamesForMigration(lu):
8046   """Expands names for use with L{TLMigrateInstance}.
8047
8048   @type lu: L{LogicalUnit}
8049
8050   """
8051   if lu.op.target_node is not None:
8052     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8053
8054   lu.needed_locks[locking.LEVEL_NODE] = []
8055   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8056
8057   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8058   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8059
8060   # The node allocation lock is actually only needed for replicated instances
8061   # (e.g. DRBD8) and if an iallocator is used.
8062   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8063
8064
8065 def _DeclareLocksForMigration(lu, level):
8066   """Declares locks for L{TLMigrateInstance}.
8067
8068   @type lu: L{LogicalUnit}
8069   @param level: Lock level
8070
8071   """
8072   if level == locking.LEVEL_NODE_ALLOC:
8073     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8074
8075     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8076
8077     # Node locks are already declared here rather than at LEVEL_NODE as we need
8078     # the instance object anyway to declare the node allocation lock.
8079     if instance.disk_template in constants.DTS_EXT_MIRROR:
8080       if lu.op.target_node is None:
8081         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8082         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8083       else:
8084         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8085                                                lu.op.target_node]
8086       del lu.recalculate_locks[locking.LEVEL_NODE]
8087     else:
8088       lu._LockInstancesNodes() # pylint: disable=W0212
8089
8090   elif level == locking.LEVEL_NODE:
8091     # Node locks are declared together with the node allocation lock
8092     assert (lu.needed_locks[locking.LEVEL_NODE] or
8093             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8094
8095   elif level == locking.LEVEL_NODE_RES:
8096     # Copy node locks
8097     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8098       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8099
8100
8101 class LUInstanceFailover(LogicalUnit):
8102   """Failover an instance.
8103
8104   """
8105   HPATH = "instance-failover"
8106   HTYPE = constants.HTYPE_INSTANCE
8107   REQ_BGL = False
8108
8109   def CheckArguments(self):
8110     """Check the arguments.
8111
8112     """
8113     self.iallocator = getattr(self.op, "iallocator", None)
8114     self.target_node = getattr(self.op, "target_node", None)
8115
8116   def ExpandNames(self):
8117     self._ExpandAndLockInstance()
8118     _ExpandNamesForMigration(self)
8119
8120     self._migrater = \
8121       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8122                         self.op.ignore_consistency, True,
8123                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8124
8125     self.tasklets = [self._migrater]
8126
8127   def DeclareLocks(self, level):
8128     _DeclareLocksForMigration(self, level)
8129
8130   def BuildHooksEnv(self):
8131     """Build hooks env.
8132
8133     This runs on master, primary and secondary nodes of the instance.
8134
8135     """
8136     instance = self._migrater.instance
8137     source_node = instance.primary_node
8138     target_node = self.op.target_node
8139     env = {
8140       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8141       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8142       "OLD_PRIMARY": source_node,
8143       "NEW_PRIMARY": target_node,
8144       }
8145
8146     if instance.disk_template in constants.DTS_INT_MIRROR:
8147       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8148       env["NEW_SECONDARY"] = source_node
8149     else:
8150       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8151
8152     env.update(_BuildInstanceHookEnvByObject(self, instance))
8153
8154     return env
8155
8156   def BuildHooksNodes(self):
8157     """Build hooks nodes.
8158
8159     """
8160     instance = self._migrater.instance
8161     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8162     return (nl, nl + [instance.primary_node])
8163
8164
8165 class LUInstanceMigrate(LogicalUnit):
8166   """Migrate an instance.
8167
8168   This is migration without shutting down, compared to the failover,
8169   which is done with shutdown.
8170
8171   """
8172   HPATH = "instance-migrate"
8173   HTYPE = constants.HTYPE_INSTANCE
8174   REQ_BGL = False
8175
8176   def ExpandNames(self):
8177     self._ExpandAndLockInstance()
8178     _ExpandNamesForMigration(self)
8179
8180     self._migrater = \
8181       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8182                         False, self.op.allow_failover, False,
8183                         self.op.allow_runtime_changes,
8184                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8185                         self.op.ignore_ipolicy)
8186
8187     self.tasklets = [self._migrater]
8188
8189   def DeclareLocks(self, level):
8190     _DeclareLocksForMigration(self, level)
8191
8192   def BuildHooksEnv(self):
8193     """Build hooks env.
8194
8195     This runs on master, primary and secondary nodes of the instance.
8196
8197     """
8198     instance = self._migrater.instance
8199     source_node = instance.primary_node
8200     target_node = self.op.target_node
8201     env = _BuildInstanceHookEnvByObject(self, instance)
8202     env.update({
8203       "MIGRATE_LIVE": self._migrater.live,
8204       "MIGRATE_CLEANUP": self.op.cleanup,
8205       "OLD_PRIMARY": source_node,
8206       "NEW_PRIMARY": target_node,
8207       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8208       })
8209
8210     if instance.disk_template in constants.DTS_INT_MIRROR:
8211       env["OLD_SECONDARY"] = target_node
8212       env["NEW_SECONDARY"] = source_node
8213     else:
8214       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8215
8216     return env
8217
8218   def BuildHooksNodes(self):
8219     """Build hooks nodes.
8220
8221     """
8222     instance = self._migrater.instance
8223     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8224     return (nl, nl + [instance.primary_node])
8225
8226
8227 class LUInstanceMove(LogicalUnit):
8228   """Move an instance by data-copying.
8229
8230   """
8231   HPATH = "instance-move"
8232   HTYPE = constants.HTYPE_INSTANCE
8233   REQ_BGL = False
8234
8235   def ExpandNames(self):
8236     self._ExpandAndLockInstance()
8237     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8238     self.op.target_node = target_node
8239     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8240     self.needed_locks[locking.LEVEL_NODE_RES] = []
8241     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8242
8243   def DeclareLocks(self, level):
8244     if level == locking.LEVEL_NODE:
8245       self._LockInstancesNodes(primary_only=True)
8246     elif level == locking.LEVEL_NODE_RES:
8247       # Copy node locks
8248       self.needed_locks[locking.LEVEL_NODE_RES] = \
8249         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8250
8251   def BuildHooksEnv(self):
8252     """Build hooks env.
8253
8254     This runs on master, primary and secondary nodes of the instance.
8255
8256     """
8257     env = {
8258       "TARGET_NODE": self.op.target_node,
8259       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8260       }
8261     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8262     return env
8263
8264   def BuildHooksNodes(self):
8265     """Build hooks nodes.
8266
8267     """
8268     nl = [
8269       self.cfg.GetMasterNode(),
8270       self.instance.primary_node,
8271       self.op.target_node,
8272       ]
8273     return (nl, nl)
8274
8275   def CheckPrereq(self):
8276     """Check prerequisites.
8277
8278     This checks that the instance is in the cluster.
8279
8280     """
8281     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8282     assert self.instance is not None, \
8283       "Cannot retrieve locked instance %s" % self.op.instance_name
8284
8285     node = self.cfg.GetNodeInfo(self.op.target_node)
8286     assert node is not None, \
8287       "Cannot retrieve locked node %s" % self.op.target_node
8288
8289     self.target_node = target_node = node.name
8290
8291     if target_node == instance.primary_node:
8292       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8293                                  (instance.name, target_node),
8294                                  errors.ECODE_STATE)
8295
8296     bep = self.cfg.GetClusterInfo().FillBE(instance)
8297
8298     for idx, dsk in enumerate(instance.disks):
8299       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8300         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8301                                    " cannot copy" % idx, errors.ECODE_STATE)
8302
8303     _CheckNodeOnline(self, target_node)
8304     _CheckNodeNotDrained(self, target_node)
8305     _CheckNodeVmCapable(self, target_node)
8306     cluster = self.cfg.GetClusterInfo()
8307     group_info = self.cfg.GetNodeGroup(node.group)
8308     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8309     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8310                             ignore=self.op.ignore_ipolicy)
8311
8312     if instance.admin_state == constants.ADMINST_UP:
8313       # check memory requirements on the secondary node
8314       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8315                            instance.name, bep[constants.BE_MAXMEM],
8316                            instance.hypervisor)
8317     else:
8318       self.LogInfo("Not checking memory on the secondary node as"
8319                    " instance will not be started")
8320
8321     # check bridge existance
8322     _CheckInstanceBridgesExist(self, instance, node=target_node)
8323
8324   def Exec(self, feedback_fn):
8325     """Move an instance.
8326
8327     The move is done by shutting it down on its present node, copying
8328     the data over (slow) and starting it on the new node.
8329
8330     """
8331     instance = self.instance
8332
8333     source_node = instance.primary_node
8334     target_node = self.target_node
8335
8336     self.LogInfo("Shutting down instance %s on source node %s",
8337                  instance.name, source_node)
8338
8339     assert (self.owned_locks(locking.LEVEL_NODE) ==
8340             self.owned_locks(locking.LEVEL_NODE_RES))
8341
8342     result = self.rpc.call_instance_shutdown(source_node, instance,
8343                                              self.op.shutdown_timeout)
8344     msg = result.fail_msg
8345     if msg:
8346       if self.op.ignore_consistency:
8347         self.LogWarning("Could not shutdown instance %s on node %s."
8348                         " Proceeding anyway. Please make sure node"
8349                         " %s is down. Error details: %s",
8350                         instance.name, source_node, source_node, msg)
8351       else:
8352         raise errors.OpExecError("Could not shutdown instance %s on"
8353                                  " node %s: %s" %
8354                                  (instance.name, source_node, msg))
8355
8356     # create the target disks
8357     try:
8358       _CreateDisks(self, instance, target_node=target_node)
8359     except errors.OpExecError:
8360       self.LogWarning("Device creation failed, reverting...")
8361       try:
8362         _RemoveDisks(self, instance, target_node=target_node)
8363       finally:
8364         self.cfg.ReleaseDRBDMinors(instance.name)
8365         raise
8366
8367     cluster_name = self.cfg.GetClusterInfo().cluster_name
8368
8369     errs = []
8370     # activate, get path, copy the data over
8371     for idx, disk in enumerate(instance.disks):
8372       self.LogInfo("Copying data for disk %d", idx)
8373       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8374                                                instance.name, True, idx)
8375       if result.fail_msg:
8376         self.LogWarning("Can't assemble newly created disk %d: %s",
8377                         idx, result.fail_msg)
8378         errs.append(result.fail_msg)
8379         break
8380       dev_path = result.payload
8381       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8382                                              target_node, dev_path,
8383                                              cluster_name)
8384       if result.fail_msg:
8385         self.LogWarning("Can't copy data over for disk %d: %s",
8386                         idx, result.fail_msg)
8387         errs.append(result.fail_msg)
8388         break
8389
8390     if errs:
8391       self.LogWarning("Some disks failed to copy, aborting")
8392       try:
8393         _RemoveDisks(self, instance, target_node=target_node)
8394       finally:
8395         self.cfg.ReleaseDRBDMinors(instance.name)
8396         raise errors.OpExecError("Errors during disk copy: %s" %
8397                                  (",".join(errs),))
8398
8399     instance.primary_node = target_node
8400     self.cfg.Update(instance, feedback_fn)
8401
8402     self.LogInfo("Removing the disks on the original node")
8403     _RemoveDisks(self, instance, target_node=source_node)
8404
8405     # Only start the instance if it's marked as up
8406     if instance.admin_state == constants.ADMINST_UP:
8407       self.LogInfo("Starting instance %s on node %s",
8408                    instance.name, target_node)
8409
8410       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8411                                            ignore_secondaries=True)
8412       if not disks_ok:
8413         _ShutdownInstanceDisks(self, instance)
8414         raise errors.OpExecError("Can't activate the instance's disks")
8415
8416       result = self.rpc.call_instance_start(target_node,
8417                                             (instance, None, None), False)
8418       msg = result.fail_msg
8419       if msg:
8420         _ShutdownInstanceDisks(self, instance)
8421         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8422                                  (instance.name, target_node, msg))
8423
8424
8425 class LUNodeMigrate(LogicalUnit):
8426   """Migrate all instances from a node.
8427
8428   """
8429   HPATH = "node-migrate"
8430   HTYPE = constants.HTYPE_NODE
8431   REQ_BGL = False
8432
8433   def CheckArguments(self):
8434     pass
8435
8436   def ExpandNames(self):
8437     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8438
8439     self.share_locks = _ShareAll()
8440     self.needed_locks = {
8441       locking.LEVEL_NODE: [self.op.node_name],
8442       }
8443
8444   def BuildHooksEnv(self):
8445     """Build hooks env.
8446
8447     This runs on the master, the primary and all the secondaries.
8448
8449     """
8450     return {
8451       "NODE_NAME": self.op.node_name,
8452       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8453       }
8454
8455   def BuildHooksNodes(self):
8456     """Build hooks nodes.
8457
8458     """
8459     nl = [self.cfg.GetMasterNode()]
8460     return (nl, nl)
8461
8462   def CheckPrereq(self):
8463     pass
8464
8465   def Exec(self, feedback_fn):
8466     # Prepare jobs for migration instances
8467     allow_runtime_changes = self.op.allow_runtime_changes
8468     jobs = [
8469       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8470                                  mode=self.op.mode,
8471                                  live=self.op.live,
8472                                  iallocator=self.op.iallocator,
8473                                  target_node=self.op.target_node,
8474                                  allow_runtime_changes=allow_runtime_changes,
8475                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8476       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8477
8478     # TODO: Run iallocator in this opcode and pass correct placement options to
8479     # OpInstanceMigrate. Since other jobs can modify the cluster between
8480     # running the iallocator and the actual migration, a good consistency model
8481     # will have to be found.
8482
8483     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8484             frozenset([self.op.node_name]))
8485
8486     return ResultWithJobs(jobs)
8487
8488
8489 class TLMigrateInstance(Tasklet):
8490   """Tasklet class for instance migration.
8491
8492   @type live: boolean
8493   @ivar live: whether the migration will be done live or non-live;
8494       this variable is initalized only after CheckPrereq has run
8495   @type cleanup: boolean
8496   @ivar cleanup: Wheater we cleanup from a failed migration
8497   @type iallocator: string
8498   @ivar iallocator: The iallocator used to determine target_node
8499   @type target_node: string
8500   @ivar target_node: If given, the target_node to reallocate the instance to
8501   @type failover: boolean
8502   @ivar failover: Whether operation results in failover or migration
8503   @type fallback: boolean
8504   @ivar fallback: Whether fallback to failover is allowed if migration not
8505                   possible
8506   @type ignore_consistency: boolean
8507   @ivar ignore_consistency: Wheter we should ignore consistency between source
8508                             and target node
8509   @type shutdown_timeout: int
8510   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8511   @type ignore_ipolicy: bool
8512   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8513
8514   """
8515
8516   # Constants
8517   _MIGRATION_POLL_INTERVAL = 1      # seconds
8518   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8519
8520   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8521                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8522                ignore_ipolicy):
8523     """Initializes this class.
8524
8525     """
8526     Tasklet.__init__(self, lu)
8527
8528     # Parameters
8529     self.instance_name = instance_name
8530     self.cleanup = cleanup
8531     self.live = False # will be overridden later
8532     self.failover = failover
8533     self.fallback = fallback
8534     self.ignore_consistency = ignore_consistency
8535     self.shutdown_timeout = shutdown_timeout
8536     self.ignore_ipolicy = ignore_ipolicy
8537     self.allow_runtime_changes = allow_runtime_changes
8538
8539   def CheckPrereq(self):
8540     """Check prerequisites.
8541
8542     This checks that the instance is in the cluster.
8543
8544     """
8545     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8546     instance = self.cfg.GetInstanceInfo(instance_name)
8547     assert instance is not None
8548     self.instance = instance
8549     cluster = self.cfg.GetClusterInfo()
8550
8551     if (not self.cleanup and
8552         not instance.admin_state == constants.ADMINST_UP and
8553         not self.failover and self.fallback):
8554       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8555                       " switching to failover")
8556       self.failover = True
8557
8558     if instance.disk_template not in constants.DTS_MIRRORED:
8559       if self.failover:
8560         text = "failovers"
8561       else:
8562         text = "migrations"
8563       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8564                                  " %s" % (instance.disk_template, text),
8565                                  errors.ECODE_STATE)
8566
8567     if instance.disk_template in constants.DTS_EXT_MIRROR:
8568       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8569
8570       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8571
8572       if self.lu.op.iallocator:
8573         self._RunAllocator()
8574       else:
8575         # We set set self.target_node as it is required by
8576         # BuildHooksEnv
8577         self.target_node = self.lu.op.target_node
8578
8579       # Check that the target node is correct in terms of instance policy
8580       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8581       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8582       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8583                                                               group_info)
8584       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8585                               ignore=self.ignore_ipolicy)
8586
8587       # self.target_node is already populated, either directly or by the
8588       # iallocator run
8589       target_node = self.target_node
8590       if self.target_node == instance.primary_node:
8591         raise errors.OpPrereqError("Cannot migrate instance %s"
8592                                    " to its primary (%s)" %
8593                                    (instance.name, instance.primary_node),
8594                                    errors.ECODE_STATE)
8595
8596       if len(self.lu.tasklets) == 1:
8597         # It is safe to release locks only when we're the only tasklet
8598         # in the LU
8599         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8600                       keep=[instance.primary_node, self.target_node])
8601         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8602
8603     else:
8604       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8605
8606       secondary_nodes = instance.secondary_nodes
8607       if not secondary_nodes:
8608         raise errors.ConfigurationError("No secondary node but using"
8609                                         " %s disk template" %
8610                                         instance.disk_template)
8611       target_node = secondary_nodes[0]
8612       if self.lu.op.iallocator or (self.lu.op.target_node and
8613                                    self.lu.op.target_node != target_node):
8614         if self.failover:
8615           text = "failed over"
8616         else:
8617           text = "migrated"
8618         raise errors.OpPrereqError("Instances with disk template %s cannot"
8619                                    " be %s to arbitrary nodes"
8620                                    " (neither an iallocator nor a target"
8621                                    " node can be passed)" %
8622                                    (instance.disk_template, text),
8623                                    errors.ECODE_INVAL)
8624       nodeinfo = self.cfg.GetNodeInfo(target_node)
8625       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8626       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8627                                                               group_info)
8628       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8629                               ignore=self.ignore_ipolicy)
8630
8631     i_be = cluster.FillBE(instance)
8632
8633     # check memory requirements on the secondary node
8634     if (not self.cleanup and
8635          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8636       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8637                                                "migrating instance %s" %
8638                                                instance.name,
8639                                                i_be[constants.BE_MINMEM],
8640                                                instance.hypervisor)
8641     else:
8642       self.lu.LogInfo("Not checking memory on the secondary node as"
8643                       " instance will not be started")
8644
8645     # check if failover must be forced instead of migration
8646     if (not self.cleanup and not self.failover and
8647         i_be[constants.BE_ALWAYS_FAILOVER]):
8648       self.lu.LogInfo("Instance configured to always failover; fallback"
8649                       " to failover")
8650       self.failover = True
8651
8652     # check bridge existance
8653     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8654
8655     if not self.cleanup:
8656       _CheckNodeNotDrained(self.lu, target_node)
8657       if not self.failover:
8658         result = self.rpc.call_instance_migratable(instance.primary_node,
8659                                                    instance)
8660         if result.fail_msg and self.fallback:
8661           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8662                           " failover")
8663           self.failover = True
8664         else:
8665           result.Raise("Can't migrate, please use failover",
8666                        prereq=True, ecode=errors.ECODE_STATE)
8667
8668     assert not (self.failover and self.cleanup)
8669
8670     if not self.failover:
8671       if self.lu.op.live is not None and self.lu.op.mode is not None:
8672         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8673                                    " parameters are accepted",
8674                                    errors.ECODE_INVAL)
8675       if self.lu.op.live is not None:
8676         if self.lu.op.live:
8677           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8678         else:
8679           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8680         # reset the 'live' parameter to None so that repeated
8681         # invocations of CheckPrereq do not raise an exception
8682         self.lu.op.live = None
8683       elif self.lu.op.mode is None:
8684         # read the default value from the hypervisor
8685         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8686         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8687
8688       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8689     else:
8690       # Failover is never live
8691       self.live = False
8692
8693     if not (self.failover or self.cleanup):
8694       remote_info = self.rpc.call_instance_info(instance.primary_node,
8695                                                 instance.name,
8696                                                 instance.hypervisor)
8697       remote_info.Raise("Error checking instance on node %s" %
8698                         instance.primary_node)
8699       instance_running = bool(remote_info.payload)
8700       if instance_running:
8701         self.current_mem = int(remote_info.payload["memory"])
8702
8703   def _RunAllocator(self):
8704     """Run the allocator based on input opcode.
8705
8706     """
8707     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8708
8709     # FIXME: add a self.ignore_ipolicy option
8710     req = iallocator.IAReqRelocate(name=self.instance_name,
8711                                    relocate_from=[self.instance.primary_node])
8712     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8713
8714     ial.Run(self.lu.op.iallocator)
8715
8716     if not ial.success:
8717       raise errors.OpPrereqError("Can't compute nodes using"
8718                                  " iallocator '%s': %s" %
8719                                  (self.lu.op.iallocator, ial.info),
8720                                  errors.ECODE_NORES)
8721     self.target_node = ial.result[0]
8722     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8723                     self.instance_name, self.lu.op.iallocator,
8724                     utils.CommaJoin(ial.result))
8725
8726   def _WaitUntilSync(self):
8727     """Poll with custom rpc for disk sync.
8728
8729     This uses our own step-based rpc call.
8730
8731     """
8732     self.feedback_fn("* wait until resync is done")
8733     all_done = False
8734     while not all_done:
8735       all_done = True
8736       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8737                                             self.nodes_ip,
8738                                             (self.instance.disks,
8739                                              self.instance))
8740       min_percent = 100
8741       for node, nres in result.items():
8742         nres.Raise("Cannot resync disks on node %s" % node)
8743         node_done, node_percent = nres.payload
8744         all_done = all_done and node_done
8745         if node_percent is not None:
8746           min_percent = min(min_percent, node_percent)
8747       if not all_done:
8748         if min_percent < 100:
8749           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8750         time.sleep(2)
8751
8752   def _EnsureSecondary(self, node):
8753     """Demote a node to secondary.
8754
8755     """
8756     self.feedback_fn("* switching node %s to secondary mode" % node)
8757
8758     for dev in self.instance.disks:
8759       self.cfg.SetDiskID(dev, node)
8760
8761     result = self.rpc.call_blockdev_close(node, self.instance.name,
8762                                           self.instance.disks)
8763     result.Raise("Cannot change disk to secondary on node %s" % node)
8764
8765   def _GoStandalone(self):
8766     """Disconnect from the network.
8767
8768     """
8769     self.feedback_fn("* changing into standalone mode")
8770     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8771                                                self.instance.disks)
8772     for node, nres in result.items():
8773       nres.Raise("Cannot disconnect disks node %s" % node)
8774
8775   def _GoReconnect(self, multimaster):
8776     """Reconnect to the network.
8777
8778     """
8779     if multimaster:
8780       msg = "dual-master"
8781     else:
8782       msg = "single-master"
8783     self.feedback_fn("* changing disks into %s mode" % msg)
8784     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8785                                            (self.instance.disks, self.instance),
8786                                            self.instance.name, multimaster)
8787     for node, nres in result.items():
8788       nres.Raise("Cannot change disks config on node %s" % node)
8789
8790   def _ExecCleanup(self):
8791     """Try to cleanup after a failed migration.
8792
8793     The cleanup is done by:
8794       - check that the instance is running only on one node
8795         (and update the config if needed)
8796       - change disks on its secondary node to secondary
8797       - wait until disks are fully synchronized
8798       - disconnect from the network
8799       - change disks into single-master mode
8800       - wait again until disks are fully synchronized
8801
8802     """
8803     instance = self.instance
8804     target_node = self.target_node
8805     source_node = self.source_node
8806
8807     # check running on only one node
8808     self.feedback_fn("* checking where the instance actually runs"
8809                      " (if this hangs, the hypervisor might be in"
8810                      " a bad state)")
8811     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8812     for node, result in ins_l.items():
8813       result.Raise("Can't contact node %s" % node)
8814
8815     runningon_source = instance.name in ins_l[source_node].payload
8816     runningon_target = instance.name in ins_l[target_node].payload
8817
8818     if runningon_source and runningon_target:
8819       raise errors.OpExecError("Instance seems to be running on two nodes,"
8820                                " or the hypervisor is confused; you will have"
8821                                " to ensure manually that it runs only on one"
8822                                " and restart this operation")
8823
8824     if not (runningon_source or runningon_target):
8825       raise errors.OpExecError("Instance does not seem to be running at all;"
8826                                " in this case it's safer to repair by"
8827                                " running 'gnt-instance stop' to ensure disk"
8828                                " shutdown, and then restarting it")
8829
8830     if runningon_target:
8831       # the migration has actually succeeded, we need to update the config
8832       self.feedback_fn("* instance running on secondary node (%s),"
8833                        " updating config" % target_node)
8834       instance.primary_node = target_node
8835       self.cfg.Update(instance, self.feedback_fn)
8836       demoted_node = source_node
8837     else:
8838       self.feedback_fn("* instance confirmed to be running on its"
8839                        " primary node (%s)" % source_node)
8840       demoted_node = target_node
8841
8842     if instance.disk_template in constants.DTS_INT_MIRROR:
8843       self._EnsureSecondary(demoted_node)
8844       try:
8845         self._WaitUntilSync()
8846       except errors.OpExecError:
8847         # we ignore here errors, since if the device is standalone, it
8848         # won't be able to sync
8849         pass
8850       self._GoStandalone()
8851       self._GoReconnect(False)
8852       self._WaitUntilSync()
8853
8854     self.feedback_fn("* done")
8855
8856   def _RevertDiskStatus(self):
8857     """Try to revert the disk status after a failed migration.
8858
8859     """
8860     target_node = self.target_node
8861     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8862       return
8863
8864     try:
8865       self._EnsureSecondary(target_node)
8866       self._GoStandalone()
8867       self._GoReconnect(False)
8868       self._WaitUntilSync()
8869     except errors.OpExecError, err:
8870       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8871                          " please try to recover the instance manually;"
8872                          " error '%s'" % str(err))
8873
8874   def _AbortMigration(self):
8875     """Call the hypervisor code to abort a started migration.
8876
8877     """
8878     instance = self.instance
8879     target_node = self.target_node
8880     source_node = self.source_node
8881     migration_info = self.migration_info
8882
8883     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8884                                                                  instance,
8885                                                                  migration_info,
8886                                                                  False)
8887     abort_msg = abort_result.fail_msg
8888     if abort_msg:
8889       logging.error("Aborting migration failed on target node %s: %s",
8890                     target_node, abort_msg)
8891       # Don't raise an exception here, as we stil have to try to revert the
8892       # disk status, even if this step failed.
8893
8894     abort_result = self.rpc.call_instance_finalize_migration_src(
8895       source_node, instance, False, self.live)
8896     abort_msg = abort_result.fail_msg
8897     if abort_msg:
8898       logging.error("Aborting migration failed on source node %s: %s",
8899                     source_node, abort_msg)
8900
8901   def _ExecMigration(self):
8902     """Migrate an instance.
8903
8904     The migrate is done by:
8905       - change the disks into dual-master mode
8906       - wait until disks are fully synchronized again
8907       - migrate the instance
8908       - change disks on the new secondary node (the old primary) to secondary
8909       - wait until disks are fully synchronized
8910       - change disks into single-master mode
8911
8912     """
8913     instance = self.instance
8914     target_node = self.target_node
8915     source_node = self.source_node
8916
8917     # Check for hypervisor version mismatch and warn the user.
8918     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8919                                        None, [self.instance.hypervisor], False)
8920     for ninfo in nodeinfo.values():
8921       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8922                   ninfo.node)
8923     (_, _, (src_info, )) = nodeinfo[source_node].payload
8924     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8925
8926     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8927         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8928       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8929       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8930       if src_version != dst_version:
8931         self.feedback_fn("* warning: hypervisor version mismatch between"
8932                          " source (%s) and target (%s) node" %
8933                          (src_version, dst_version))
8934
8935     self.feedback_fn("* checking disk consistency between source and target")
8936     for (idx, dev) in enumerate(instance.disks):
8937       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8938         raise errors.OpExecError("Disk %s is degraded or not fully"
8939                                  " synchronized on target node,"
8940                                  " aborting migration" % idx)
8941
8942     if self.current_mem > self.tgt_free_mem:
8943       if not self.allow_runtime_changes:
8944         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8945                                  " free memory to fit instance %s on target"
8946                                  " node %s (have %dMB, need %dMB)" %
8947                                  (instance.name, target_node,
8948                                   self.tgt_free_mem, self.current_mem))
8949       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8950       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8951                                                      instance,
8952                                                      self.tgt_free_mem)
8953       rpcres.Raise("Cannot modify instance runtime memory")
8954
8955     # First get the migration information from the remote node
8956     result = self.rpc.call_migration_info(source_node, instance)
8957     msg = result.fail_msg
8958     if msg:
8959       log_err = ("Failed fetching source migration information from %s: %s" %
8960                  (source_node, msg))
8961       logging.error(log_err)
8962       raise errors.OpExecError(log_err)
8963
8964     self.migration_info = migration_info = result.payload
8965
8966     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8967       # Then switch the disks to master/master mode
8968       self._EnsureSecondary(target_node)
8969       self._GoStandalone()
8970       self._GoReconnect(True)
8971       self._WaitUntilSync()
8972
8973     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8974     result = self.rpc.call_accept_instance(target_node,
8975                                            instance,
8976                                            migration_info,
8977                                            self.nodes_ip[target_node])
8978
8979     msg = result.fail_msg
8980     if msg:
8981       logging.error("Instance pre-migration failed, trying to revert"
8982                     " disk status: %s", msg)
8983       self.feedback_fn("Pre-migration failed, aborting")
8984       self._AbortMigration()
8985       self._RevertDiskStatus()
8986       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8987                                (instance.name, msg))
8988
8989     self.feedback_fn("* migrating instance to %s" % target_node)
8990     result = self.rpc.call_instance_migrate(source_node, instance,
8991                                             self.nodes_ip[target_node],
8992                                             self.live)
8993     msg = result.fail_msg
8994     if msg:
8995       logging.error("Instance migration failed, trying to revert"
8996                     " disk status: %s", msg)
8997       self.feedback_fn("Migration failed, aborting")
8998       self._AbortMigration()
8999       self._RevertDiskStatus()
9000       raise errors.OpExecError("Could not migrate instance %s: %s" %
9001                                (instance.name, msg))
9002
9003     self.feedback_fn("* starting memory transfer")
9004     last_feedback = time.time()
9005     while True:
9006       result = self.rpc.call_instance_get_migration_status(source_node,
9007                                                            instance)
9008       msg = result.fail_msg
9009       ms = result.payload   # MigrationStatus instance
9010       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9011         logging.error("Instance migration failed, trying to revert"
9012                       " disk status: %s", msg)
9013         self.feedback_fn("Migration failed, aborting")
9014         self._AbortMigration()
9015         self._RevertDiskStatus()
9016         if not msg:
9017           msg = "hypervisor returned failure"
9018         raise errors.OpExecError("Could not migrate instance %s: %s" %
9019                                  (instance.name, msg))
9020
9021       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9022         self.feedback_fn("* memory transfer complete")
9023         break
9024
9025       if (utils.TimeoutExpired(last_feedback,
9026                                self._MIGRATION_FEEDBACK_INTERVAL) and
9027           ms.transferred_ram is not None):
9028         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9029         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9030         last_feedback = time.time()
9031
9032       time.sleep(self._MIGRATION_POLL_INTERVAL)
9033
9034     result = self.rpc.call_instance_finalize_migration_src(source_node,
9035                                                            instance,
9036                                                            True,
9037                                                            self.live)
9038     msg = result.fail_msg
9039     if msg:
9040       logging.error("Instance migration succeeded, but finalization failed"
9041                     " on the source node: %s", msg)
9042       raise errors.OpExecError("Could not finalize instance migration: %s" %
9043                                msg)
9044
9045     instance.primary_node = target_node
9046
9047     # distribute new instance config to the other nodes
9048     self.cfg.Update(instance, self.feedback_fn)
9049
9050     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9051                                                            instance,
9052                                                            migration_info,
9053                                                            True)
9054     msg = result.fail_msg
9055     if msg:
9056       logging.error("Instance migration succeeded, but finalization failed"
9057                     " on the target node: %s", msg)
9058       raise errors.OpExecError("Could not finalize instance migration: %s" %
9059                                msg)
9060
9061     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9062       self._EnsureSecondary(source_node)
9063       self._WaitUntilSync()
9064       self._GoStandalone()
9065       self._GoReconnect(False)
9066       self._WaitUntilSync()
9067
9068     # If the instance's disk template is `rbd' or `ext' and there was a
9069     # successful migration, unmap the device from the source node.
9070     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9071       disks = _ExpandCheckDisks(instance, instance.disks)
9072       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9073       for disk in disks:
9074         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9075         msg = result.fail_msg
9076         if msg:
9077           logging.error("Migration was successful, but couldn't unmap the"
9078                         " block device %s on source node %s: %s",
9079                         disk.iv_name, source_node, msg)
9080           logging.error("You need to unmap the device %s manually on %s",
9081                         disk.iv_name, source_node)
9082
9083     self.feedback_fn("* done")
9084
9085   def _ExecFailover(self):
9086     """Failover an instance.
9087
9088     The failover is done by shutting it down on its present node and
9089     starting it on the secondary.
9090
9091     """
9092     instance = self.instance
9093     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9094
9095     source_node = instance.primary_node
9096     target_node = self.target_node
9097
9098     if instance.admin_state == constants.ADMINST_UP:
9099       self.feedback_fn("* checking disk consistency between source and target")
9100       for (idx, dev) in enumerate(instance.disks):
9101         # for drbd, these are drbd over lvm
9102         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9103                                      False):
9104           if primary_node.offline:
9105             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9106                              " target node %s" %
9107                              (primary_node.name, idx, target_node))
9108           elif not self.ignore_consistency:
9109             raise errors.OpExecError("Disk %s is degraded on target node,"
9110                                      " aborting failover" % idx)
9111     else:
9112       self.feedback_fn("* not checking disk consistency as instance is not"
9113                        " running")
9114
9115     self.feedback_fn("* shutting down instance on source node")
9116     logging.info("Shutting down instance %s on node %s",
9117                  instance.name, source_node)
9118
9119     result = self.rpc.call_instance_shutdown(source_node, instance,
9120                                              self.shutdown_timeout)
9121     msg = result.fail_msg
9122     if msg:
9123       if self.ignore_consistency or primary_node.offline:
9124         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9125                            " proceeding anyway; please make sure node"
9126                            " %s is down; error details: %s",
9127                            instance.name, source_node, source_node, msg)
9128       else:
9129         raise errors.OpExecError("Could not shutdown instance %s on"
9130                                  " node %s: %s" %
9131                                  (instance.name, source_node, msg))
9132
9133     self.feedback_fn("* deactivating the instance's disks on source node")
9134     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9135       raise errors.OpExecError("Can't shut down the instance's disks")
9136
9137     instance.primary_node = target_node
9138     # distribute new instance config to the other nodes
9139     self.cfg.Update(instance, self.feedback_fn)
9140
9141     # Only start the instance if it's marked as up
9142     if instance.admin_state == constants.ADMINST_UP:
9143       self.feedback_fn("* activating the instance's disks on target node %s" %
9144                        target_node)
9145       logging.info("Starting instance %s on node %s",
9146                    instance.name, target_node)
9147
9148       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9149                                            ignore_secondaries=True)
9150       if not disks_ok:
9151         _ShutdownInstanceDisks(self.lu, instance)
9152         raise errors.OpExecError("Can't activate the instance's disks")
9153
9154       self.feedback_fn("* starting the instance on the target node %s" %
9155                        target_node)
9156       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9157                                             False)
9158       msg = result.fail_msg
9159       if msg:
9160         _ShutdownInstanceDisks(self.lu, instance)
9161         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9162                                  (instance.name, target_node, msg))
9163
9164   def Exec(self, feedback_fn):
9165     """Perform the migration.
9166
9167     """
9168     self.feedback_fn = feedback_fn
9169     self.source_node = self.instance.primary_node
9170
9171     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9172     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9173       self.target_node = self.instance.secondary_nodes[0]
9174       # Otherwise self.target_node has been populated either
9175       # directly, or through an iallocator.
9176
9177     self.all_nodes = [self.source_node, self.target_node]
9178     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9179                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9180
9181     if self.failover:
9182       feedback_fn("Failover instance %s" % self.instance.name)
9183       self._ExecFailover()
9184     else:
9185       feedback_fn("Migrating instance %s" % self.instance.name)
9186
9187       if self.cleanup:
9188         return self._ExecCleanup()
9189       else:
9190         return self._ExecMigration()
9191
9192
9193 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9194                     force_open):
9195   """Wrapper around L{_CreateBlockDevInner}.
9196
9197   This method annotates the root device first.
9198
9199   """
9200   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9201   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9202   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9203                               force_open, excl_stor)
9204
9205
9206 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9207                          info, force_open, excl_stor):
9208   """Create a tree of block devices on a given node.
9209
9210   If this device type has to be created on secondaries, create it and
9211   all its children.
9212
9213   If not, just recurse to children keeping the same 'force' value.
9214
9215   @attention: The device has to be annotated already.
9216
9217   @param lu: the lu on whose behalf we execute
9218   @param node: the node on which to create the device
9219   @type instance: L{objects.Instance}
9220   @param instance: the instance which owns the device
9221   @type device: L{objects.Disk}
9222   @param device: the device to create
9223   @type force_create: boolean
9224   @param force_create: whether to force creation of this device; this
9225       will be change to True whenever we find a device which has
9226       CreateOnSecondary() attribute
9227   @param info: the extra 'metadata' we should attach to the device
9228       (this will be represented as a LVM tag)
9229   @type force_open: boolean
9230   @param force_open: this parameter will be passes to the
9231       L{backend.BlockdevCreate} function where it specifies
9232       whether we run on primary or not, and it affects both
9233       the child assembly and the device own Open() execution
9234   @type excl_stor: boolean
9235   @param excl_stor: Whether exclusive_storage is active for the node
9236
9237   """
9238   if device.CreateOnSecondary():
9239     force_create = True
9240
9241   if device.children:
9242     for child in device.children:
9243       _CreateBlockDevInner(lu, node, instance, child, force_create,
9244                            info, force_open, excl_stor)
9245
9246   if not force_create:
9247     return
9248
9249   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9250                         excl_stor)
9251
9252
9253 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9254                           excl_stor):
9255   """Create a single block device on a given node.
9256
9257   This will not recurse over children of the device, so they must be
9258   created in advance.
9259
9260   @param lu: the lu on whose behalf we execute
9261   @param node: the node on which to create the device
9262   @type instance: L{objects.Instance}
9263   @param instance: the instance which owns the device
9264   @type device: L{objects.Disk}
9265   @param device: the device to create
9266   @param info: the extra 'metadata' we should attach to the device
9267       (this will be represented as a LVM tag)
9268   @type force_open: boolean
9269   @param force_open: this parameter will be passes to the
9270       L{backend.BlockdevCreate} function where it specifies
9271       whether we run on primary or not, and it affects both
9272       the child assembly and the device own Open() execution
9273   @type excl_stor: boolean
9274   @param excl_stor: Whether exclusive_storage is active for the node
9275
9276   """
9277   lu.cfg.SetDiskID(device, node)
9278   result = lu.rpc.call_blockdev_create(node, device, device.size,
9279                                        instance.name, force_open, info,
9280                                        excl_stor)
9281   result.Raise("Can't create block device %s on"
9282                " node %s for instance %s" % (device, node, instance.name))
9283   if device.physical_id is None:
9284     device.physical_id = result.payload
9285
9286
9287 def _GenerateUniqueNames(lu, exts):
9288   """Generate a suitable LV name.
9289
9290   This will generate a logical volume name for the given instance.
9291
9292   """
9293   results = []
9294   for val in exts:
9295     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9296     results.append("%s%s" % (new_id, val))
9297   return results
9298
9299
9300 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9301                          iv_name, p_minor, s_minor):
9302   """Generate a drbd8 device complete with its children.
9303
9304   """
9305   assert len(vgnames) == len(names) == 2
9306   port = lu.cfg.AllocatePort()
9307   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9308
9309   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9310                           logical_id=(vgnames[0], names[0]),
9311                           params={})
9312   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9313                           size=constants.DRBD_META_SIZE,
9314                           logical_id=(vgnames[1], names[1]),
9315                           params={})
9316   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9317                           logical_id=(primary, secondary, port,
9318                                       p_minor, s_minor,
9319                                       shared_secret),
9320                           children=[dev_data, dev_meta],
9321                           iv_name=iv_name, params={})
9322   return drbd_dev
9323
9324
9325 _DISK_TEMPLATE_NAME_PREFIX = {
9326   constants.DT_PLAIN: "",
9327   constants.DT_RBD: ".rbd",
9328   constants.DT_EXT: ".ext",
9329   }
9330
9331
9332 _DISK_TEMPLATE_DEVICE_TYPE = {
9333   constants.DT_PLAIN: constants.LD_LV,
9334   constants.DT_FILE: constants.LD_FILE,
9335   constants.DT_SHARED_FILE: constants.LD_FILE,
9336   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9337   constants.DT_RBD: constants.LD_RBD,
9338   constants.DT_EXT: constants.LD_EXT,
9339   }
9340
9341
9342 def _GenerateDiskTemplate(
9343   lu, template_name, instance_name, primary_node, secondary_nodes,
9344   disk_info, file_storage_dir, file_driver, base_index,
9345   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9346   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9347   """Generate the entire disk layout for a given template type.
9348
9349   """
9350   vgname = lu.cfg.GetVGName()
9351   disk_count = len(disk_info)
9352   disks = []
9353
9354   if template_name == constants.DT_DISKLESS:
9355     pass
9356   elif template_name == constants.DT_DRBD8:
9357     if len(secondary_nodes) != 1:
9358       raise errors.ProgrammerError("Wrong template configuration")
9359     remote_node = secondary_nodes[0]
9360     minors = lu.cfg.AllocateDRBDMinor(
9361       [primary_node, remote_node] * len(disk_info), instance_name)
9362
9363     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9364                                                        full_disk_params)
9365     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9366
9367     names = []
9368     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9369                                                for i in range(disk_count)]):
9370       names.append(lv_prefix + "_data")
9371       names.append(lv_prefix + "_meta")
9372     for idx, disk in enumerate(disk_info):
9373       disk_index = idx + base_index
9374       data_vg = disk.get(constants.IDISK_VG, vgname)
9375       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9376       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9377                                       disk[constants.IDISK_SIZE],
9378                                       [data_vg, meta_vg],
9379                                       names[idx * 2:idx * 2 + 2],
9380                                       "disk/%d" % disk_index,
9381                                       minors[idx * 2], minors[idx * 2 + 1])
9382       disk_dev.mode = disk[constants.IDISK_MODE]
9383       disks.append(disk_dev)
9384   else:
9385     if secondary_nodes:
9386       raise errors.ProgrammerError("Wrong template configuration")
9387
9388     if template_name == constants.DT_FILE:
9389       _req_file_storage()
9390     elif template_name == constants.DT_SHARED_FILE:
9391       _req_shr_file_storage()
9392
9393     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9394     if name_prefix is None:
9395       names = None
9396     else:
9397       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9398                                         (name_prefix, base_index + i)
9399                                         for i in range(disk_count)])
9400
9401     if template_name == constants.DT_PLAIN:
9402
9403       def logical_id_fn(idx, _, disk):
9404         vg = disk.get(constants.IDISK_VG, vgname)
9405         return (vg, names[idx])
9406
9407     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9408       logical_id_fn = \
9409         lambda _, disk_index, disk: (file_driver,
9410                                      "%s/disk%d" % (file_storage_dir,
9411                                                     disk_index))
9412     elif template_name == constants.DT_BLOCK:
9413       logical_id_fn = \
9414         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9415                                        disk[constants.IDISK_ADOPT])
9416     elif template_name == constants.DT_RBD:
9417       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9418     elif template_name == constants.DT_EXT:
9419       def logical_id_fn(idx, _, disk):
9420         provider = disk.get(constants.IDISK_PROVIDER, None)
9421         if provider is None:
9422           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9423                                        " not found", constants.DT_EXT,
9424                                        constants.IDISK_PROVIDER)
9425         return (provider, names[idx])
9426     else:
9427       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9428
9429     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9430
9431     for idx, disk in enumerate(disk_info):
9432       params = {}
9433       # Only for the Ext template add disk_info to params
9434       if template_name == constants.DT_EXT:
9435         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9436         for key in disk:
9437           if key not in constants.IDISK_PARAMS:
9438             params[key] = disk[key]
9439       disk_index = idx + base_index
9440       size = disk[constants.IDISK_SIZE]
9441       feedback_fn("* disk %s, size %s" %
9442                   (disk_index, utils.FormatUnit(size, "h")))
9443       disks.append(objects.Disk(dev_type=dev_type, size=size,
9444                                 logical_id=logical_id_fn(idx, disk_index, disk),
9445                                 iv_name="disk/%d" % disk_index,
9446                                 mode=disk[constants.IDISK_MODE],
9447                                 params=params))
9448
9449   return disks
9450
9451
9452 def _GetInstanceInfoText(instance):
9453   """Compute that text that should be added to the disk's metadata.
9454
9455   """
9456   return "originstname+%s" % instance.name
9457
9458
9459 def _CalcEta(time_taken, written, total_size):
9460   """Calculates the ETA based on size written and total size.
9461
9462   @param time_taken: The time taken so far
9463   @param written: amount written so far
9464   @param total_size: The total size of data to be written
9465   @return: The remaining time in seconds
9466
9467   """
9468   avg_time = time_taken / float(written)
9469   return (total_size - written) * avg_time
9470
9471
9472 def _WipeDisks(lu, instance, disks=None):
9473   """Wipes instance disks.
9474
9475   @type lu: L{LogicalUnit}
9476   @param lu: the logical unit on whose behalf we execute
9477   @type instance: L{objects.Instance}
9478   @param instance: the instance whose disks we should create
9479   @return: the success of the wipe
9480
9481   """
9482   node = instance.primary_node
9483
9484   if disks is None:
9485     disks = [(idx, disk, 0)
9486              for (idx, disk) in enumerate(instance.disks)]
9487
9488   for (_, device, _) in disks:
9489     lu.cfg.SetDiskID(device, node)
9490
9491   logging.info("Pausing synchronization of disks of instance '%s'",
9492                instance.name)
9493   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9494                                                   (map(compat.snd, disks),
9495                                                    instance),
9496                                                   True)
9497   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9498
9499   for idx, success in enumerate(result.payload):
9500     if not success:
9501       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9502                    " failed", idx, instance.name)
9503
9504   try:
9505     for (idx, device, offset) in disks:
9506       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9507       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9508       wipe_chunk_size = \
9509         int(min(constants.MAX_WIPE_CHUNK,
9510                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9511
9512       size = device.size
9513       last_output = 0
9514       start_time = time.time()
9515
9516       if offset == 0:
9517         info_text = ""
9518       else:
9519         info_text = (" (from %s to %s)" %
9520                      (utils.FormatUnit(offset, "h"),
9521                       utils.FormatUnit(size, "h")))
9522
9523       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9524
9525       logging.info("Wiping disk %d for instance %s on node %s using"
9526                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9527
9528       while offset < size:
9529         wipe_size = min(wipe_chunk_size, size - offset)
9530
9531         logging.debug("Wiping disk %d, offset %s, chunk %s",
9532                       idx, offset, wipe_size)
9533
9534         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9535                                            wipe_size)
9536         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9537                      (idx, offset, wipe_size))
9538
9539         now = time.time()
9540         offset += wipe_size
9541         if now - last_output >= 60:
9542           eta = _CalcEta(now - start_time, offset, size)
9543           lu.LogInfo(" - done: %.1f%% ETA: %s",
9544                      offset / float(size) * 100, utils.FormatSeconds(eta))
9545           last_output = now
9546   finally:
9547     logging.info("Resuming synchronization of disks for instance '%s'",
9548                  instance.name)
9549
9550     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9551                                                     (map(compat.snd, disks),
9552                                                      instance),
9553                                                     False)
9554
9555     if result.fail_msg:
9556       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9557                     node, result.fail_msg)
9558     else:
9559       for idx, success in enumerate(result.payload):
9560         if not success:
9561           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9562                         " failed", idx, instance.name)
9563
9564
9565 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9566   """Create all disks for an instance.
9567
9568   This abstracts away some work from AddInstance.
9569
9570   @type lu: L{LogicalUnit}
9571   @param lu: the logical unit on whose behalf we execute
9572   @type instance: L{objects.Instance}
9573   @param instance: the instance whose disks we should create
9574   @type to_skip: list
9575   @param to_skip: list of indices to skip
9576   @type target_node: string
9577   @param target_node: if passed, overrides the target node for creation
9578   @rtype: boolean
9579   @return: the success of the creation
9580
9581   """
9582   info = _GetInstanceInfoText(instance)
9583   if target_node is None:
9584     pnode = instance.primary_node
9585     all_nodes = instance.all_nodes
9586   else:
9587     pnode = target_node
9588     all_nodes = [pnode]
9589
9590   if instance.disk_template in constants.DTS_FILEBASED:
9591     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9592     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9593
9594     result.Raise("Failed to create directory '%s' on"
9595                  " node %s" % (file_storage_dir, pnode))
9596
9597   # Note: this needs to be kept in sync with adding of disks in
9598   # LUInstanceSetParams
9599   for idx, device in enumerate(instance.disks):
9600     if to_skip and idx in to_skip:
9601       continue
9602     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9603     #HARDCODE
9604     for node in all_nodes:
9605       f_create = node == pnode
9606       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9607
9608
9609 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9610   """Remove all disks for an instance.
9611
9612   This abstracts away some work from `AddInstance()` and
9613   `RemoveInstance()`. Note that in case some of the devices couldn't
9614   be removed, the removal will continue with the other ones (compare
9615   with `_CreateDisks()`).
9616
9617   @type lu: L{LogicalUnit}
9618   @param lu: the logical unit on whose behalf we execute
9619   @type instance: L{objects.Instance}
9620   @param instance: the instance whose disks we should remove
9621   @type target_node: string
9622   @param target_node: used to override the node on which to remove the disks
9623   @rtype: boolean
9624   @return: the success of the removal
9625
9626   """
9627   logging.info("Removing block devices for instance %s", instance.name)
9628
9629   all_result = True
9630   ports_to_release = set()
9631   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9632   for (idx, device) in enumerate(anno_disks):
9633     if target_node:
9634       edata = [(target_node, device)]
9635     else:
9636       edata = device.ComputeNodeTree(instance.primary_node)
9637     for node, disk in edata:
9638       lu.cfg.SetDiskID(disk, node)
9639       result = lu.rpc.call_blockdev_remove(node, disk)
9640       if result.fail_msg:
9641         lu.LogWarning("Could not remove disk %s on node %s,"
9642                       " continuing anyway: %s", idx, node, result.fail_msg)
9643         if not (result.offline and node != instance.primary_node):
9644           all_result = False
9645
9646     # if this is a DRBD disk, return its port to the pool
9647     if device.dev_type in constants.LDS_DRBD:
9648       ports_to_release.add(device.logical_id[2])
9649
9650   if all_result or ignore_failures:
9651     for port in ports_to_release:
9652       lu.cfg.AddTcpUdpPort(port)
9653
9654   if instance.disk_template in constants.DTS_FILEBASED:
9655     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9656     if target_node:
9657       tgt = target_node
9658     else:
9659       tgt = instance.primary_node
9660     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9661     if result.fail_msg:
9662       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9663                     file_storage_dir, instance.primary_node, result.fail_msg)
9664       all_result = False
9665
9666   return all_result
9667
9668
9669 def _ComputeDiskSizePerVG(disk_template, disks):
9670   """Compute disk size requirements in the volume group
9671
9672   """
9673   def _compute(disks, payload):
9674     """Universal algorithm.
9675
9676     """
9677     vgs = {}
9678     for disk in disks:
9679       vgs[disk[constants.IDISK_VG]] = \
9680         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9681
9682     return vgs
9683
9684   # Required free disk space as a function of disk and swap space
9685   req_size_dict = {
9686     constants.DT_DISKLESS: {},
9687     constants.DT_PLAIN: _compute(disks, 0),
9688     # 128 MB are added for drbd metadata for each disk
9689     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9690     constants.DT_FILE: {},
9691     constants.DT_SHARED_FILE: {},
9692   }
9693
9694   if disk_template not in req_size_dict:
9695     raise errors.ProgrammerError("Disk template '%s' size requirement"
9696                                  " is unknown" % disk_template)
9697
9698   return req_size_dict[disk_template]
9699
9700
9701 def _FilterVmNodes(lu, nodenames):
9702   """Filters out non-vm_capable nodes from a list.
9703
9704   @type lu: L{LogicalUnit}
9705   @param lu: the logical unit for which we check
9706   @type nodenames: list
9707   @param nodenames: the list of nodes on which we should check
9708   @rtype: list
9709   @return: the list of vm-capable nodes
9710
9711   """
9712   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9713   return [name for name in nodenames if name not in vm_nodes]
9714
9715
9716 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9717   """Hypervisor parameter validation.
9718
9719   This function abstract the hypervisor parameter validation to be
9720   used in both instance create and instance modify.
9721
9722   @type lu: L{LogicalUnit}
9723   @param lu: the logical unit for which we check
9724   @type nodenames: list
9725   @param nodenames: the list of nodes on which we should check
9726   @type hvname: string
9727   @param hvname: the name of the hypervisor we should use
9728   @type hvparams: dict
9729   @param hvparams: the parameters which we need to check
9730   @raise errors.OpPrereqError: if the parameters are not valid
9731
9732   """
9733   nodenames = _FilterVmNodes(lu, nodenames)
9734
9735   cluster = lu.cfg.GetClusterInfo()
9736   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9737
9738   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9739   for node in nodenames:
9740     info = hvinfo[node]
9741     if info.offline:
9742       continue
9743     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9744
9745
9746 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9747   """OS parameters validation.
9748
9749   @type lu: L{LogicalUnit}
9750   @param lu: the logical unit for which we check
9751   @type required: boolean
9752   @param required: whether the validation should fail if the OS is not
9753       found
9754   @type nodenames: list
9755   @param nodenames: the list of nodes on which we should check
9756   @type osname: string
9757   @param osname: the name of the hypervisor we should use
9758   @type osparams: dict
9759   @param osparams: the parameters which we need to check
9760   @raise errors.OpPrereqError: if the parameters are not valid
9761
9762   """
9763   nodenames = _FilterVmNodes(lu, nodenames)
9764   result = lu.rpc.call_os_validate(nodenames, required, osname,
9765                                    [constants.OS_VALIDATE_PARAMETERS],
9766                                    osparams)
9767   for node, nres in result.items():
9768     # we don't check for offline cases since this should be run only
9769     # against the master node and/or an instance's nodes
9770     nres.Raise("OS Parameters validation failed on node %s" % node)
9771     if not nres.payload:
9772       lu.LogInfo("OS %s not found on node %s, validation skipped",
9773                  osname, node)
9774
9775
9776 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9777   """Wrapper around IAReqInstanceAlloc.
9778
9779   @param op: The instance opcode
9780   @param disks: The computed disks
9781   @param nics: The computed nics
9782   @param beparams: The full filled beparams
9783   @param node_whitelist: List of nodes which should appear as online to the
9784     allocator (unless the node is already marked offline)
9785
9786   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9787
9788   """
9789   spindle_use = beparams[constants.BE_SPINDLE_USE]
9790   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9791                                        disk_template=op.disk_template,
9792                                        tags=op.tags,
9793                                        os=op.os_type,
9794                                        vcpus=beparams[constants.BE_VCPUS],
9795                                        memory=beparams[constants.BE_MAXMEM],
9796                                        spindle_use=spindle_use,
9797                                        disks=disks,
9798                                        nics=[n.ToDict() for n in nics],
9799                                        hypervisor=op.hypervisor,
9800                                        node_whitelist=node_whitelist)
9801
9802
9803 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9804   """Computes the nics.
9805
9806   @param op: The instance opcode
9807   @param cluster: Cluster configuration object
9808   @param default_ip: The default ip to assign
9809   @param cfg: An instance of the configuration object
9810   @param ec_id: Execution context ID
9811
9812   @returns: The build up nics
9813
9814   """
9815   nics = []
9816   for nic in op.nics:
9817     nic_mode_req = nic.get(constants.INIC_MODE, None)
9818     nic_mode = nic_mode_req
9819     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9820       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9821
9822     net = nic.get(constants.INIC_NETWORK, None)
9823     link = nic.get(constants.NIC_LINK, None)
9824     ip = nic.get(constants.INIC_IP, None)
9825
9826     if net is None or net.lower() == constants.VALUE_NONE:
9827       net = None
9828     else:
9829       if nic_mode_req is not None or link is not None:
9830         raise errors.OpPrereqError("If network is given, no mode or link"
9831                                    " is allowed to be passed",
9832                                    errors.ECODE_INVAL)
9833
9834     # ip validity checks
9835     if ip is None or ip.lower() == constants.VALUE_NONE:
9836       nic_ip = None
9837     elif ip.lower() == constants.VALUE_AUTO:
9838       if not op.name_check:
9839         raise errors.OpPrereqError("IP address set to auto but name checks"
9840                                    " have been skipped",
9841                                    errors.ECODE_INVAL)
9842       nic_ip = default_ip
9843     else:
9844       # We defer pool operations until later, so that the iallocator has
9845       # filled in the instance's node(s) dimara
9846       if ip.lower() == constants.NIC_IP_POOL:
9847         if net is None:
9848           raise errors.OpPrereqError("if ip=pool, parameter network"
9849                                      " must be passed too",
9850                                      errors.ECODE_INVAL)
9851
9852       elif not netutils.IPAddress.IsValid(ip):
9853         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9854                                    errors.ECODE_INVAL)
9855
9856       nic_ip = ip
9857
9858     # TODO: check the ip address for uniqueness
9859     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9860       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9861                                  errors.ECODE_INVAL)
9862
9863     # MAC address verification
9864     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9865     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9866       mac = utils.NormalizeAndValidateMac(mac)
9867
9868       try:
9869         # TODO: We need to factor this out
9870         cfg.ReserveMAC(mac, ec_id)
9871       except errors.ReservationError:
9872         raise errors.OpPrereqError("MAC address %s already in use"
9873                                    " in cluster" % mac,
9874                                    errors.ECODE_NOTUNIQUE)
9875
9876     #  Build nic parameters
9877     nicparams = {}
9878     if nic_mode_req:
9879       nicparams[constants.NIC_MODE] = nic_mode
9880     if link:
9881       nicparams[constants.NIC_LINK] = link
9882
9883     check_params = cluster.SimpleFillNIC(nicparams)
9884     objects.NIC.CheckParameterSyntax(check_params)
9885     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9886                             network=net, nicparams=nicparams))
9887
9888   return nics
9889
9890
9891 def _ComputeDisks(op, default_vg):
9892   """Computes the instance disks.
9893
9894   @param op: The instance opcode
9895   @param default_vg: The default_vg to assume
9896
9897   @return: The computed disks
9898
9899   """
9900   disks = []
9901   for disk in op.disks:
9902     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9903     if mode not in constants.DISK_ACCESS_SET:
9904       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9905                                  mode, errors.ECODE_INVAL)
9906     size = disk.get(constants.IDISK_SIZE, None)
9907     if size is None:
9908       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9909     try:
9910       size = int(size)
9911     except (TypeError, ValueError):
9912       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9913                                  errors.ECODE_INVAL)
9914
9915     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9916     if ext_provider and op.disk_template != constants.DT_EXT:
9917       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9918                                  " disk template, not %s" %
9919                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
9920                                  op.disk_template), errors.ECODE_INVAL)
9921
9922     data_vg = disk.get(constants.IDISK_VG, default_vg)
9923     new_disk = {
9924       constants.IDISK_SIZE: size,
9925       constants.IDISK_MODE: mode,
9926       constants.IDISK_VG: data_vg,
9927       }
9928
9929     if constants.IDISK_METAVG in disk:
9930       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9931     if constants.IDISK_ADOPT in disk:
9932       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9933
9934     # For extstorage, demand the `provider' option and add any
9935     # additional parameters (ext-params) to the dict
9936     if op.disk_template == constants.DT_EXT:
9937       if ext_provider:
9938         new_disk[constants.IDISK_PROVIDER] = ext_provider
9939         for key in disk:
9940           if key not in constants.IDISK_PARAMS:
9941             new_disk[key] = disk[key]
9942       else:
9943         raise errors.OpPrereqError("Missing provider for template '%s'" %
9944                                    constants.DT_EXT, errors.ECODE_INVAL)
9945
9946     disks.append(new_disk)
9947
9948   return disks
9949
9950
9951 def _ComputeFullBeParams(op, cluster):
9952   """Computes the full beparams.
9953
9954   @param op: The instance opcode
9955   @param cluster: The cluster config object
9956
9957   @return: The fully filled beparams
9958
9959   """
9960   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9961   for param, value in op.beparams.iteritems():
9962     if value == constants.VALUE_AUTO:
9963       op.beparams[param] = default_beparams[param]
9964   objects.UpgradeBeParams(op.beparams)
9965   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9966   return cluster.SimpleFillBE(op.beparams)
9967
9968
9969 def _CheckOpportunisticLocking(op):
9970   """Generate error if opportunistic locking is not possible.
9971
9972   """
9973   if op.opportunistic_locking and not op.iallocator:
9974     raise errors.OpPrereqError("Opportunistic locking is only available in"
9975                                " combination with an instance allocator",
9976                                errors.ECODE_INVAL)
9977
9978
9979 class LUInstanceCreate(LogicalUnit):
9980   """Create an instance.
9981
9982   """
9983   HPATH = "instance-add"
9984   HTYPE = constants.HTYPE_INSTANCE
9985   REQ_BGL = False
9986
9987   def CheckArguments(self):
9988     """Check arguments.
9989
9990     """
9991     # do not require name_check to ease forward/backward compatibility
9992     # for tools
9993     if self.op.no_install and self.op.start:
9994       self.LogInfo("No-installation mode selected, disabling startup")
9995       self.op.start = False
9996     # validate/normalize the instance name
9997     self.op.instance_name = \
9998       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9999
10000     if self.op.ip_check and not self.op.name_check:
10001       # TODO: make the ip check more flexible and not depend on the name check
10002       raise errors.OpPrereqError("Cannot do IP address check without a name"
10003                                  " check", errors.ECODE_INVAL)
10004
10005     # check nics' parameter names
10006     for nic in self.op.nics:
10007       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10008
10009     # check disks. parameter names and consistent adopt/no-adopt strategy
10010     has_adopt = has_no_adopt = False
10011     for disk in self.op.disks:
10012       if self.op.disk_template != constants.DT_EXT:
10013         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10014       if constants.IDISK_ADOPT in disk:
10015         has_adopt = True
10016       else:
10017         has_no_adopt = True
10018     if has_adopt and has_no_adopt:
10019       raise errors.OpPrereqError("Either all disks are adopted or none is",
10020                                  errors.ECODE_INVAL)
10021     if has_adopt:
10022       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10023         raise errors.OpPrereqError("Disk adoption is not supported for the"
10024                                    " '%s' disk template" %
10025                                    self.op.disk_template,
10026                                    errors.ECODE_INVAL)
10027       if self.op.iallocator is not None:
10028         raise errors.OpPrereqError("Disk adoption not allowed with an"
10029                                    " iallocator script", errors.ECODE_INVAL)
10030       if self.op.mode == constants.INSTANCE_IMPORT:
10031         raise errors.OpPrereqError("Disk adoption not allowed for"
10032                                    " instance import", errors.ECODE_INVAL)
10033     else:
10034       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10035         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10036                                    " but no 'adopt' parameter given" %
10037                                    self.op.disk_template,
10038                                    errors.ECODE_INVAL)
10039
10040     self.adopt_disks = has_adopt
10041
10042     # instance name verification
10043     if self.op.name_check:
10044       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10045       self.op.instance_name = self.hostname1.name
10046       # used in CheckPrereq for ip ping check
10047       self.check_ip = self.hostname1.ip
10048     else:
10049       self.check_ip = None
10050
10051     # file storage checks
10052     if (self.op.file_driver and
10053         not self.op.file_driver in constants.FILE_DRIVER):
10054       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10055                                  self.op.file_driver, errors.ECODE_INVAL)
10056
10057     if self.op.disk_template == constants.DT_FILE:
10058       opcodes.RequireFileStorage()
10059     elif self.op.disk_template == constants.DT_SHARED_FILE:
10060       opcodes.RequireSharedFileStorage()
10061
10062     ### Node/iallocator related checks
10063     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10064
10065     if self.op.pnode is not None:
10066       if self.op.disk_template in constants.DTS_INT_MIRROR:
10067         if self.op.snode is None:
10068           raise errors.OpPrereqError("The networked disk templates need"
10069                                      " a mirror node", errors.ECODE_INVAL)
10070       elif self.op.snode:
10071         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10072                         " template")
10073         self.op.snode = None
10074
10075     _CheckOpportunisticLocking(self.op)
10076
10077     self._cds = _GetClusterDomainSecret()
10078
10079     if self.op.mode == constants.INSTANCE_IMPORT:
10080       # On import force_variant must be True, because if we forced it at
10081       # initial install, our only chance when importing it back is that it
10082       # works again!
10083       self.op.force_variant = True
10084
10085       if self.op.no_install:
10086         self.LogInfo("No-installation mode has no effect during import")
10087
10088     elif self.op.mode == constants.INSTANCE_CREATE:
10089       if self.op.os_type is None:
10090         raise errors.OpPrereqError("No guest OS specified",
10091                                    errors.ECODE_INVAL)
10092       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10093         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10094                                    " installation" % self.op.os_type,
10095                                    errors.ECODE_STATE)
10096       if self.op.disk_template is None:
10097         raise errors.OpPrereqError("No disk template specified",
10098                                    errors.ECODE_INVAL)
10099
10100     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10101       # Check handshake to ensure both clusters have the same domain secret
10102       src_handshake = self.op.source_handshake
10103       if not src_handshake:
10104         raise errors.OpPrereqError("Missing source handshake",
10105                                    errors.ECODE_INVAL)
10106
10107       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10108                                                            src_handshake)
10109       if errmsg:
10110         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10111                                    errors.ECODE_INVAL)
10112
10113       # Load and check source CA
10114       self.source_x509_ca_pem = self.op.source_x509_ca
10115       if not self.source_x509_ca_pem:
10116         raise errors.OpPrereqError("Missing source X509 CA",
10117                                    errors.ECODE_INVAL)
10118
10119       try:
10120         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10121                                                     self._cds)
10122       except OpenSSL.crypto.Error, err:
10123         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10124                                    (err, ), errors.ECODE_INVAL)
10125
10126       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10127       if errcode is not None:
10128         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10129                                    errors.ECODE_INVAL)
10130
10131       self.source_x509_ca = cert
10132
10133       src_instance_name = self.op.source_instance_name
10134       if not src_instance_name:
10135         raise errors.OpPrereqError("Missing source instance name",
10136                                    errors.ECODE_INVAL)
10137
10138       self.source_instance_name = \
10139           netutils.GetHostname(name=src_instance_name).name
10140
10141     else:
10142       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10143                                  self.op.mode, errors.ECODE_INVAL)
10144
10145   def ExpandNames(self):
10146     """ExpandNames for CreateInstance.
10147
10148     Figure out the right locks for instance creation.
10149
10150     """
10151     self.needed_locks = {}
10152
10153     instance_name = self.op.instance_name
10154     # this is just a preventive check, but someone might still add this
10155     # instance in the meantime, and creation will fail at lock-add time
10156     if instance_name in self.cfg.GetInstanceList():
10157       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10158                                  instance_name, errors.ECODE_EXISTS)
10159
10160     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10161
10162     if self.op.iallocator:
10163       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10164       # specifying a group on instance creation and then selecting nodes from
10165       # that group
10166       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10167       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10168
10169       if self.op.opportunistic_locking:
10170         self.opportunistic_locks[locking.LEVEL_NODE] = True
10171         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10172     else:
10173       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10174       nodelist = [self.op.pnode]
10175       if self.op.snode is not None:
10176         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10177         nodelist.append(self.op.snode)
10178       self.needed_locks[locking.LEVEL_NODE] = nodelist
10179
10180     # in case of import lock the source node too
10181     if self.op.mode == constants.INSTANCE_IMPORT:
10182       src_node = self.op.src_node
10183       src_path = self.op.src_path
10184
10185       if src_path is None:
10186         self.op.src_path = src_path = self.op.instance_name
10187
10188       if src_node is None:
10189         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10190         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10191         self.op.src_node = None
10192         if os.path.isabs(src_path):
10193           raise errors.OpPrereqError("Importing an instance from a path"
10194                                      " requires a source node option",
10195                                      errors.ECODE_INVAL)
10196       else:
10197         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10198         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10199           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10200         if not os.path.isabs(src_path):
10201           self.op.src_path = src_path = \
10202             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10203
10204     self.needed_locks[locking.LEVEL_NODE_RES] = \
10205       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10206
10207   def _RunAllocator(self):
10208     """Run the allocator based on input opcode.
10209
10210     """
10211     if self.op.opportunistic_locking:
10212       # Only consider nodes for which a lock is held
10213       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10214     else:
10215       node_whitelist = None
10216
10217     #TODO Export network to iallocator so that it chooses a pnode
10218     #     in a nodegroup that has the desired network connected to
10219     req = _CreateInstanceAllocRequest(self.op, self.disks,
10220                                       self.nics, self.be_full,
10221                                       node_whitelist)
10222     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10223
10224     ial.Run(self.op.iallocator)
10225
10226     if not ial.success:
10227       # When opportunistic locks are used only a temporary failure is generated
10228       if self.op.opportunistic_locking:
10229         ecode = errors.ECODE_TEMP_NORES
10230       else:
10231         ecode = errors.ECODE_NORES
10232
10233       raise errors.OpPrereqError("Can't compute nodes using"
10234                                  " iallocator '%s': %s" %
10235                                  (self.op.iallocator, ial.info),
10236                                  ecode)
10237
10238     self.op.pnode = ial.result[0]
10239     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10240                  self.op.instance_name, self.op.iallocator,
10241                  utils.CommaJoin(ial.result))
10242
10243     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10244
10245     if req.RequiredNodes() == 2:
10246       self.op.snode = ial.result[1]
10247
10248   def BuildHooksEnv(self):
10249     """Build hooks env.
10250
10251     This runs on master, primary and secondary nodes of the instance.
10252
10253     """
10254     env = {
10255       "ADD_MODE": self.op.mode,
10256       }
10257     if self.op.mode == constants.INSTANCE_IMPORT:
10258       env["SRC_NODE"] = self.op.src_node
10259       env["SRC_PATH"] = self.op.src_path
10260       env["SRC_IMAGES"] = self.src_images
10261
10262     env.update(_BuildInstanceHookEnv(
10263       name=self.op.instance_name,
10264       primary_node=self.op.pnode,
10265       secondary_nodes=self.secondaries,
10266       status=self.op.start,
10267       os_type=self.op.os_type,
10268       minmem=self.be_full[constants.BE_MINMEM],
10269       maxmem=self.be_full[constants.BE_MAXMEM],
10270       vcpus=self.be_full[constants.BE_VCPUS],
10271       nics=_NICListToTuple(self, self.nics),
10272       disk_template=self.op.disk_template,
10273       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10274              for d in self.disks],
10275       bep=self.be_full,
10276       hvp=self.hv_full,
10277       hypervisor_name=self.op.hypervisor,
10278       tags=self.op.tags,
10279     ))
10280
10281     return env
10282
10283   def BuildHooksNodes(self):
10284     """Build hooks nodes.
10285
10286     """
10287     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10288     return nl, nl
10289
10290   def _ReadExportInfo(self):
10291     """Reads the export information from disk.
10292
10293     It will override the opcode source node and path with the actual
10294     information, if these two were not specified before.
10295
10296     @return: the export information
10297
10298     """
10299     assert self.op.mode == constants.INSTANCE_IMPORT
10300
10301     src_node = self.op.src_node
10302     src_path = self.op.src_path
10303
10304     if src_node is None:
10305       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10306       exp_list = self.rpc.call_export_list(locked_nodes)
10307       found = False
10308       for node in exp_list:
10309         if exp_list[node].fail_msg:
10310           continue
10311         if src_path in exp_list[node].payload:
10312           found = True
10313           self.op.src_node = src_node = node
10314           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10315                                                        src_path)
10316           break
10317       if not found:
10318         raise errors.OpPrereqError("No export found for relative path %s" %
10319                                     src_path, errors.ECODE_INVAL)
10320
10321     _CheckNodeOnline(self, src_node)
10322     result = self.rpc.call_export_info(src_node, src_path)
10323     result.Raise("No export or invalid export found in dir %s" % src_path)
10324
10325     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10326     if not export_info.has_section(constants.INISECT_EXP):
10327       raise errors.ProgrammerError("Corrupted export config",
10328                                    errors.ECODE_ENVIRON)
10329
10330     ei_version = export_info.get(constants.INISECT_EXP, "version")
10331     if (int(ei_version) != constants.EXPORT_VERSION):
10332       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10333                                  (ei_version, constants.EXPORT_VERSION),
10334                                  errors.ECODE_ENVIRON)
10335     return export_info
10336
10337   def _ReadExportParams(self, einfo):
10338     """Use export parameters as defaults.
10339
10340     In case the opcode doesn't specify (as in override) some instance
10341     parameters, then try to use them from the export information, if
10342     that declares them.
10343
10344     """
10345     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10346
10347     if self.op.disk_template is None:
10348       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10349         self.op.disk_template = einfo.get(constants.INISECT_INS,
10350                                           "disk_template")
10351         if self.op.disk_template not in constants.DISK_TEMPLATES:
10352           raise errors.OpPrereqError("Disk template specified in configuration"
10353                                      " file is not one of the allowed values:"
10354                                      " %s" %
10355                                      " ".join(constants.DISK_TEMPLATES),
10356                                      errors.ECODE_INVAL)
10357       else:
10358         raise errors.OpPrereqError("No disk template specified and the export"
10359                                    " is missing the disk_template information",
10360                                    errors.ECODE_INVAL)
10361
10362     if not self.op.disks:
10363       disks = []
10364       # TODO: import the disk iv_name too
10365       for idx in range(constants.MAX_DISKS):
10366         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10367           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10368           disks.append({constants.IDISK_SIZE: disk_sz})
10369       self.op.disks = disks
10370       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10371         raise errors.OpPrereqError("No disk info specified and the export"
10372                                    " is missing the disk information",
10373                                    errors.ECODE_INVAL)
10374
10375     if not self.op.nics:
10376       nics = []
10377       for idx in range(constants.MAX_NICS):
10378         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10379           ndict = {}
10380           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10381             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10382             ndict[name] = v
10383           nics.append(ndict)
10384         else:
10385           break
10386       self.op.nics = nics
10387
10388     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10389       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10390
10391     if (self.op.hypervisor is None and
10392         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10393       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10394
10395     if einfo.has_section(constants.INISECT_HYP):
10396       # use the export parameters but do not override the ones
10397       # specified by the user
10398       for name, value in einfo.items(constants.INISECT_HYP):
10399         if name not in self.op.hvparams:
10400           self.op.hvparams[name] = value
10401
10402     if einfo.has_section(constants.INISECT_BEP):
10403       # use the parameters, without overriding
10404       for name, value in einfo.items(constants.INISECT_BEP):
10405         if name not in self.op.beparams:
10406           self.op.beparams[name] = value
10407         # Compatibility for the old "memory" be param
10408         if name == constants.BE_MEMORY:
10409           if constants.BE_MAXMEM not in self.op.beparams:
10410             self.op.beparams[constants.BE_MAXMEM] = value
10411           if constants.BE_MINMEM not in self.op.beparams:
10412             self.op.beparams[constants.BE_MINMEM] = value
10413     else:
10414       # try to read the parameters old style, from the main section
10415       for name in constants.BES_PARAMETERS:
10416         if (name not in self.op.beparams and
10417             einfo.has_option(constants.INISECT_INS, name)):
10418           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10419
10420     if einfo.has_section(constants.INISECT_OSP):
10421       # use the parameters, without overriding
10422       for name, value in einfo.items(constants.INISECT_OSP):
10423         if name not in self.op.osparams:
10424           self.op.osparams[name] = value
10425
10426   def _RevertToDefaults(self, cluster):
10427     """Revert the instance parameters to the default values.
10428
10429     """
10430     # hvparams
10431     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10432     for name in self.op.hvparams.keys():
10433       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10434         del self.op.hvparams[name]
10435     # beparams
10436     be_defs = cluster.SimpleFillBE({})
10437     for name in self.op.beparams.keys():
10438       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10439         del self.op.beparams[name]
10440     # nic params
10441     nic_defs = cluster.SimpleFillNIC({})
10442     for nic in self.op.nics:
10443       for name in constants.NICS_PARAMETERS:
10444         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10445           del nic[name]
10446     # osparams
10447     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10448     for name in self.op.osparams.keys():
10449       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10450         del self.op.osparams[name]
10451
10452   def _CalculateFileStorageDir(self):
10453     """Calculate final instance file storage dir.
10454
10455     """
10456     # file storage dir calculation/check
10457     self.instance_file_storage_dir = None
10458     if self.op.disk_template in constants.DTS_FILEBASED:
10459       # build the full file storage dir path
10460       joinargs = []
10461
10462       if self.op.disk_template == constants.DT_SHARED_FILE:
10463         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10464       else:
10465         get_fsd_fn = self.cfg.GetFileStorageDir
10466
10467       cfg_storagedir = get_fsd_fn()
10468       if not cfg_storagedir:
10469         raise errors.OpPrereqError("Cluster file storage dir not defined",
10470                                    errors.ECODE_STATE)
10471       joinargs.append(cfg_storagedir)
10472
10473       if self.op.file_storage_dir is not None:
10474         joinargs.append(self.op.file_storage_dir)
10475
10476       joinargs.append(self.op.instance_name)
10477
10478       # pylint: disable=W0142
10479       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10480
10481   def CheckPrereq(self): # pylint: disable=R0914
10482     """Check prerequisites.
10483
10484     """
10485     self._CalculateFileStorageDir()
10486
10487     if self.op.mode == constants.INSTANCE_IMPORT:
10488       export_info = self._ReadExportInfo()
10489       self._ReadExportParams(export_info)
10490       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10491     else:
10492       self._old_instance_name = None
10493
10494     if (not self.cfg.GetVGName() and
10495         self.op.disk_template not in constants.DTS_NOT_LVM):
10496       raise errors.OpPrereqError("Cluster does not support lvm-based"
10497                                  " instances", errors.ECODE_STATE)
10498
10499     if (self.op.hypervisor is None or
10500         self.op.hypervisor == constants.VALUE_AUTO):
10501       self.op.hypervisor = self.cfg.GetHypervisorType()
10502
10503     cluster = self.cfg.GetClusterInfo()
10504     enabled_hvs = cluster.enabled_hypervisors
10505     if self.op.hypervisor not in enabled_hvs:
10506       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10507                                  " cluster (%s)" %
10508                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10509                                  errors.ECODE_STATE)
10510
10511     # Check tag validity
10512     for tag in self.op.tags:
10513       objects.TaggableObject.ValidateTag(tag)
10514
10515     # check hypervisor parameter syntax (locally)
10516     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10517     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10518                                       self.op.hvparams)
10519     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10520     hv_type.CheckParameterSyntax(filled_hvp)
10521     self.hv_full = filled_hvp
10522     # check that we don't specify global parameters on an instance
10523     _CheckGlobalHvParams(self.op.hvparams)
10524
10525     # fill and remember the beparams dict
10526     self.be_full = _ComputeFullBeParams(self.op, cluster)
10527
10528     # build os parameters
10529     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10530
10531     # now that hvp/bep are in final format, let's reset to defaults,
10532     # if told to do so
10533     if self.op.identify_defaults:
10534       self._RevertToDefaults(cluster)
10535
10536     # NIC buildup
10537     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10538                              self.proc.GetECId())
10539
10540     # disk checks/pre-build
10541     default_vg = self.cfg.GetVGName()
10542     self.disks = _ComputeDisks(self.op, default_vg)
10543
10544     if self.op.mode == constants.INSTANCE_IMPORT:
10545       disk_images = []
10546       for idx in range(len(self.disks)):
10547         option = "disk%d_dump" % idx
10548         if export_info.has_option(constants.INISECT_INS, option):
10549           # FIXME: are the old os-es, disk sizes, etc. useful?
10550           export_name = export_info.get(constants.INISECT_INS, option)
10551           image = utils.PathJoin(self.op.src_path, export_name)
10552           disk_images.append(image)
10553         else:
10554           disk_images.append(False)
10555
10556       self.src_images = disk_images
10557
10558       if self.op.instance_name == self._old_instance_name:
10559         for idx, nic in enumerate(self.nics):
10560           if nic.mac == constants.VALUE_AUTO:
10561             nic_mac_ini = "nic%d_mac" % idx
10562             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10563
10564     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10565
10566     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10567     if self.op.ip_check:
10568       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10569         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10570                                    (self.check_ip, self.op.instance_name),
10571                                    errors.ECODE_NOTUNIQUE)
10572
10573     #### mac address generation
10574     # By generating here the mac address both the allocator and the hooks get
10575     # the real final mac address rather than the 'auto' or 'generate' value.
10576     # There is a race condition between the generation and the instance object
10577     # creation, which means that we know the mac is valid now, but we're not
10578     # sure it will be when we actually add the instance. If things go bad
10579     # adding the instance will abort because of a duplicate mac, and the
10580     # creation job will fail.
10581     for nic in self.nics:
10582       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10583         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10584
10585     #### allocator run
10586
10587     if self.op.iallocator is not None:
10588       self._RunAllocator()
10589
10590     # Release all unneeded node locks
10591     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10592     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10593     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10594     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10595
10596     assert (self.owned_locks(locking.LEVEL_NODE) ==
10597             self.owned_locks(locking.LEVEL_NODE_RES)), \
10598       "Node locks differ from node resource locks"
10599
10600     #### node related checks
10601
10602     # check primary node
10603     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10604     assert self.pnode is not None, \
10605       "Cannot retrieve locked node %s" % self.op.pnode
10606     if pnode.offline:
10607       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10608                                  pnode.name, errors.ECODE_STATE)
10609     if pnode.drained:
10610       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10611                                  pnode.name, errors.ECODE_STATE)
10612     if not pnode.vm_capable:
10613       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10614                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10615
10616     self.secondaries = []
10617
10618     # Fill in any IPs from IP pools. This must happen here, because we need to
10619     # know the nic's primary node, as specified by the iallocator
10620     for idx, nic in enumerate(self.nics):
10621       net = nic.network
10622       if net is not None:
10623         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10624         if netparams is None:
10625           raise errors.OpPrereqError("No netparams found for network"
10626                                      " %s. Propably not connected to"
10627                                      " node's %s nodegroup" %
10628                                      (net, self.pnode.name),
10629                                      errors.ECODE_INVAL)
10630         self.LogInfo("NIC/%d inherits netparams %s" %
10631                      (idx, netparams.values()))
10632         nic.nicparams = dict(netparams)
10633         if nic.ip is not None:
10634           if nic.ip.lower() == constants.NIC_IP_POOL:
10635             try:
10636               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10637             except errors.ReservationError:
10638               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10639                                          " from the address pool" % idx,
10640                                          errors.ECODE_STATE)
10641             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10642           else:
10643             try:
10644               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10645             except errors.ReservationError:
10646               raise errors.OpPrereqError("IP address %s already in use"
10647                                          " or does not belong to network %s" %
10648                                          (nic.ip, net),
10649                                          errors.ECODE_NOTUNIQUE)
10650
10651       # net is None, ip None or given
10652       elif self.op.conflicts_check:
10653         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10654
10655     # mirror node verification
10656     if self.op.disk_template in constants.DTS_INT_MIRROR:
10657       if self.op.snode == pnode.name:
10658         raise errors.OpPrereqError("The secondary node cannot be the"
10659                                    " primary node", errors.ECODE_INVAL)
10660       _CheckNodeOnline(self, self.op.snode)
10661       _CheckNodeNotDrained(self, self.op.snode)
10662       _CheckNodeVmCapable(self, self.op.snode)
10663       self.secondaries.append(self.op.snode)
10664
10665       snode = self.cfg.GetNodeInfo(self.op.snode)
10666       if pnode.group != snode.group:
10667         self.LogWarning("The primary and secondary nodes are in two"
10668                         " different node groups; the disk parameters"
10669                         " from the first disk's node group will be"
10670                         " used")
10671
10672     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10673       nodes = [pnode]
10674       if self.op.disk_template in constants.DTS_INT_MIRROR:
10675         nodes.append(snode)
10676       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10677       if compat.any(map(has_es, nodes)):
10678         raise errors.OpPrereqError("Disk template %s not supported with"
10679                                    " exclusive storage" % self.op.disk_template,
10680                                    errors.ECODE_STATE)
10681
10682     nodenames = [pnode.name] + self.secondaries
10683
10684     # Verify instance specs
10685     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10686     ispec = {
10687       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10688       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10689       constants.ISPEC_DISK_COUNT: len(self.disks),
10690       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10691       constants.ISPEC_NIC_COUNT: len(self.nics),
10692       constants.ISPEC_SPINDLE_USE: spindle_use,
10693       }
10694
10695     group_info = self.cfg.GetNodeGroup(pnode.group)
10696     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10697     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10698     if not self.op.ignore_ipolicy and res:
10699       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10700              (pnode.group, group_info.name, utils.CommaJoin(res)))
10701       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10702
10703     if not self.adopt_disks:
10704       if self.op.disk_template == constants.DT_RBD:
10705         # _CheckRADOSFreeSpace() is just a placeholder.
10706         # Any function that checks prerequisites can be placed here.
10707         # Check if there is enough space on the RADOS cluster.
10708         _CheckRADOSFreeSpace()
10709       elif self.op.disk_template == constants.DT_EXT:
10710         # FIXME: Function that checks prereqs if needed
10711         pass
10712       else:
10713         # Check lv size requirements, if not adopting
10714         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10715         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10716
10717     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10718       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10719                                 disk[constants.IDISK_ADOPT])
10720                      for disk in self.disks])
10721       if len(all_lvs) != len(self.disks):
10722         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10723                                    errors.ECODE_INVAL)
10724       for lv_name in all_lvs:
10725         try:
10726           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10727           # to ReserveLV uses the same syntax
10728           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10729         except errors.ReservationError:
10730           raise errors.OpPrereqError("LV named %s used by another instance" %
10731                                      lv_name, errors.ECODE_NOTUNIQUE)
10732
10733       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10734       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10735
10736       node_lvs = self.rpc.call_lv_list([pnode.name],
10737                                        vg_names.payload.keys())[pnode.name]
10738       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10739       node_lvs = node_lvs.payload
10740
10741       delta = all_lvs.difference(node_lvs.keys())
10742       if delta:
10743         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10744                                    utils.CommaJoin(delta),
10745                                    errors.ECODE_INVAL)
10746       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10747       if online_lvs:
10748         raise errors.OpPrereqError("Online logical volumes found, cannot"
10749                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10750                                    errors.ECODE_STATE)
10751       # update the size of disk based on what is found
10752       for dsk in self.disks:
10753         dsk[constants.IDISK_SIZE] = \
10754           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10755                                         dsk[constants.IDISK_ADOPT])][0]))
10756
10757     elif self.op.disk_template == constants.DT_BLOCK:
10758       # Normalize and de-duplicate device paths
10759       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10760                        for disk in self.disks])
10761       if len(all_disks) != len(self.disks):
10762         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10763                                    errors.ECODE_INVAL)
10764       baddisks = [d for d in all_disks
10765                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10766       if baddisks:
10767         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10768                                    " cannot be adopted" %
10769                                    (utils.CommaJoin(baddisks),
10770                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10771                                    errors.ECODE_INVAL)
10772
10773       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10774                                             list(all_disks))[pnode.name]
10775       node_disks.Raise("Cannot get block device information from node %s" %
10776                        pnode.name)
10777       node_disks = node_disks.payload
10778       delta = all_disks.difference(node_disks.keys())
10779       if delta:
10780         raise errors.OpPrereqError("Missing block device(s): %s" %
10781                                    utils.CommaJoin(delta),
10782                                    errors.ECODE_INVAL)
10783       for dsk in self.disks:
10784         dsk[constants.IDISK_SIZE] = \
10785           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10786
10787     # Verify instance specs
10788     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10789     ispec = {
10790       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10791       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10792       constants.ISPEC_DISK_COUNT: len(self.disks),
10793       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10794                                   for disk in self.disks],
10795       constants.ISPEC_NIC_COUNT: len(self.nics),
10796       constants.ISPEC_SPINDLE_USE: spindle_use,
10797       }
10798
10799     group_info = self.cfg.GetNodeGroup(pnode.group)
10800     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10801     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10802     if not self.op.ignore_ipolicy and res:
10803       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10804                                   " policy: %s") % (pnode.group,
10805                                                     utils.CommaJoin(res)),
10806                                   errors.ECODE_INVAL)
10807
10808     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10809
10810     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10811     # check OS parameters (remotely)
10812     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10813
10814     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10815
10816     #TODO: _CheckExtParams (remotely)
10817     # Check parameters for extstorage
10818
10819     # memory check on primary node
10820     #TODO(dynmem): use MINMEM for checking
10821     if self.op.start:
10822       _CheckNodeFreeMemory(self, self.pnode.name,
10823                            "creating instance %s" % self.op.instance_name,
10824                            self.be_full[constants.BE_MAXMEM],
10825                            self.op.hypervisor)
10826
10827     self.dry_run_result = list(nodenames)
10828
10829   def Exec(self, feedback_fn):
10830     """Create and add the instance to the cluster.
10831
10832     """
10833     instance = self.op.instance_name
10834     pnode_name = self.pnode.name
10835
10836     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10837                 self.owned_locks(locking.LEVEL_NODE)), \
10838       "Node locks differ from node resource locks"
10839     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10840
10841     ht_kind = self.op.hypervisor
10842     if ht_kind in constants.HTS_REQ_PORT:
10843       network_port = self.cfg.AllocatePort()
10844     else:
10845       network_port = None
10846
10847     # This is ugly but we got a chicken-egg problem here
10848     # We can only take the group disk parameters, as the instance
10849     # has no disks yet (we are generating them right here).
10850     node = self.cfg.GetNodeInfo(pnode_name)
10851     nodegroup = self.cfg.GetNodeGroup(node.group)
10852     disks = _GenerateDiskTemplate(self,
10853                                   self.op.disk_template,
10854                                   instance, pnode_name,
10855                                   self.secondaries,
10856                                   self.disks,
10857                                   self.instance_file_storage_dir,
10858                                   self.op.file_driver,
10859                                   0,
10860                                   feedback_fn,
10861                                   self.cfg.GetGroupDiskParams(nodegroup))
10862
10863     iobj = objects.Instance(name=instance, os=self.op.os_type,
10864                             primary_node=pnode_name,
10865                             nics=self.nics, disks=disks,
10866                             disk_template=self.op.disk_template,
10867                             admin_state=constants.ADMINST_DOWN,
10868                             network_port=network_port,
10869                             beparams=self.op.beparams,
10870                             hvparams=self.op.hvparams,
10871                             hypervisor=self.op.hypervisor,
10872                             osparams=self.op.osparams,
10873                             )
10874
10875     if self.op.tags:
10876       for tag in self.op.tags:
10877         iobj.AddTag(tag)
10878
10879     if self.adopt_disks:
10880       if self.op.disk_template == constants.DT_PLAIN:
10881         # rename LVs to the newly-generated names; we need to construct
10882         # 'fake' LV disks with the old data, plus the new unique_id
10883         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10884         rename_to = []
10885         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10886           rename_to.append(t_dsk.logical_id)
10887           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10888           self.cfg.SetDiskID(t_dsk, pnode_name)
10889         result = self.rpc.call_blockdev_rename(pnode_name,
10890                                                zip(tmp_disks, rename_to))
10891         result.Raise("Failed to rename adoped LVs")
10892     else:
10893       feedback_fn("* creating instance disks...")
10894       try:
10895         _CreateDisks(self, iobj)
10896       except errors.OpExecError:
10897         self.LogWarning("Device creation failed, reverting...")
10898         try:
10899           _RemoveDisks(self, iobj)
10900         finally:
10901           self.cfg.ReleaseDRBDMinors(instance)
10902           raise
10903
10904     feedback_fn("adding instance %s to cluster config" % instance)
10905
10906     self.cfg.AddInstance(iobj, self.proc.GetECId())
10907
10908     # Declare that we don't want to remove the instance lock anymore, as we've
10909     # added the instance to the config
10910     del self.remove_locks[locking.LEVEL_INSTANCE]
10911
10912     if self.op.mode == constants.INSTANCE_IMPORT:
10913       # Release unused nodes
10914       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10915     else:
10916       # Release all nodes
10917       _ReleaseLocks(self, locking.LEVEL_NODE)
10918
10919     disk_abort = False
10920     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10921       feedback_fn("* wiping instance disks...")
10922       try:
10923         _WipeDisks(self, iobj)
10924       except errors.OpExecError, err:
10925         logging.exception("Wiping disks failed")
10926         self.LogWarning("Wiping instance disks failed (%s)", err)
10927         disk_abort = True
10928
10929     if disk_abort:
10930       # Something is already wrong with the disks, don't do anything else
10931       pass
10932     elif self.op.wait_for_sync:
10933       disk_abort = not _WaitForSync(self, iobj)
10934     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10935       # make sure the disks are not degraded (still sync-ing is ok)
10936       feedback_fn("* checking mirrors status")
10937       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10938     else:
10939       disk_abort = False
10940
10941     if disk_abort:
10942       _RemoveDisks(self, iobj)
10943       self.cfg.RemoveInstance(iobj.name)
10944       # Make sure the instance lock gets removed
10945       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10946       raise errors.OpExecError("There are some degraded disks for"
10947                                " this instance")
10948
10949     # Release all node resource locks
10950     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10951
10952     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10953       # we need to set the disks ID to the primary node, since the
10954       # preceding code might or might have not done it, depending on
10955       # disk template and other options
10956       for disk in iobj.disks:
10957         self.cfg.SetDiskID(disk, pnode_name)
10958       if self.op.mode == constants.INSTANCE_CREATE:
10959         if not self.op.no_install:
10960           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10961                         not self.op.wait_for_sync)
10962           if pause_sync:
10963             feedback_fn("* pausing disk sync to install instance OS")
10964             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10965                                                               (iobj.disks,
10966                                                                iobj), True)
10967             for idx, success in enumerate(result.payload):
10968               if not success:
10969                 logging.warn("pause-sync of instance %s for disk %d failed",
10970                              instance, idx)
10971
10972           feedback_fn("* running the instance OS create scripts...")
10973           # FIXME: pass debug option from opcode to backend
10974           os_add_result = \
10975             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10976                                           self.op.debug_level)
10977           if pause_sync:
10978             feedback_fn("* resuming disk sync")
10979             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10980                                                               (iobj.disks,
10981                                                                iobj), False)
10982             for idx, success in enumerate(result.payload):
10983               if not success:
10984                 logging.warn("resume-sync of instance %s for disk %d failed",
10985                              instance, idx)
10986
10987           os_add_result.Raise("Could not add os for instance %s"
10988                               " on node %s" % (instance, pnode_name))
10989
10990       else:
10991         if self.op.mode == constants.INSTANCE_IMPORT:
10992           feedback_fn("* running the instance OS import scripts...")
10993
10994           transfers = []
10995
10996           for idx, image in enumerate(self.src_images):
10997             if not image:
10998               continue
10999
11000             # FIXME: pass debug option from opcode to backend
11001             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11002                                                constants.IEIO_FILE, (image, ),
11003                                                constants.IEIO_SCRIPT,
11004                                                (iobj.disks[idx], idx),
11005                                                None)
11006             transfers.append(dt)
11007
11008           import_result = \
11009             masterd.instance.TransferInstanceData(self, feedback_fn,
11010                                                   self.op.src_node, pnode_name,
11011                                                   self.pnode.secondary_ip,
11012                                                   iobj, transfers)
11013           if not compat.all(import_result):
11014             self.LogWarning("Some disks for instance %s on node %s were not"
11015                             " imported successfully" % (instance, pnode_name))
11016
11017           rename_from = self._old_instance_name
11018
11019         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11020           feedback_fn("* preparing remote import...")
11021           # The source cluster will stop the instance before attempting to make
11022           # a connection. In some cases stopping an instance can take a long
11023           # time, hence the shutdown timeout is added to the connection
11024           # timeout.
11025           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11026                              self.op.source_shutdown_timeout)
11027           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11028
11029           assert iobj.primary_node == self.pnode.name
11030           disk_results = \
11031             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11032                                           self.source_x509_ca,
11033                                           self._cds, timeouts)
11034           if not compat.all(disk_results):
11035             # TODO: Should the instance still be started, even if some disks
11036             # failed to import (valid for local imports, too)?
11037             self.LogWarning("Some disks for instance %s on node %s were not"
11038                             " imported successfully" % (instance, pnode_name))
11039
11040           rename_from = self.source_instance_name
11041
11042         else:
11043           # also checked in the prereq part
11044           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11045                                        % self.op.mode)
11046
11047         # Run rename script on newly imported instance
11048         assert iobj.name == instance
11049         feedback_fn("Running rename script for %s" % instance)
11050         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11051                                                    rename_from,
11052                                                    self.op.debug_level)
11053         if result.fail_msg:
11054           self.LogWarning("Failed to run rename script for %s on node"
11055                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11056
11057     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11058
11059     if self.op.start:
11060       iobj.admin_state = constants.ADMINST_UP
11061       self.cfg.Update(iobj, feedback_fn)
11062       logging.info("Starting instance %s on node %s", instance, pnode_name)
11063       feedback_fn("* starting instance...")
11064       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11065                                             False)
11066       result.Raise("Could not start instance")
11067
11068     return list(iobj.all_nodes)
11069
11070
11071 class LUInstanceMultiAlloc(NoHooksLU):
11072   """Allocates multiple instances at the same time.
11073
11074   """
11075   REQ_BGL = False
11076
11077   def CheckArguments(self):
11078     """Check arguments.
11079
11080     """
11081     nodes = []
11082     for inst in self.op.instances:
11083       if inst.iallocator is not None:
11084         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11085                                    " instance objects", errors.ECODE_INVAL)
11086       nodes.append(bool(inst.pnode))
11087       if inst.disk_template in constants.DTS_INT_MIRROR:
11088         nodes.append(bool(inst.snode))
11089
11090     has_nodes = compat.any(nodes)
11091     if compat.all(nodes) ^ has_nodes:
11092       raise errors.OpPrereqError("There are instance objects providing"
11093                                  " pnode/snode while others do not",
11094                                  errors.ECODE_INVAL)
11095
11096     if self.op.iallocator is None:
11097       default_iallocator = self.cfg.GetDefaultIAllocator()
11098       if default_iallocator and has_nodes:
11099         self.op.iallocator = default_iallocator
11100       else:
11101         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11102                                    " given and no cluster-wide default"
11103                                    " iallocator found; please specify either"
11104                                    " an iallocator or nodes on the instances"
11105                                    " or set a cluster-wide default iallocator",
11106                                    errors.ECODE_INVAL)
11107
11108     _CheckOpportunisticLocking(self.op)
11109
11110     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11111     if dups:
11112       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11113                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11114
11115   def ExpandNames(self):
11116     """Calculate the locks.
11117
11118     """
11119     self.share_locks = _ShareAll()
11120     self.needed_locks = {
11121       # iallocator will select nodes and even if no iallocator is used,
11122       # collisions with LUInstanceCreate should be avoided
11123       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11124       }
11125
11126     if self.op.iallocator:
11127       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11128       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11129
11130       if self.op.opportunistic_locking:
11131         self.opportunistic_locks[locking.LEVEL_NODE] = True
11132         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11133     else:
11134       nodeslist = []
11135       for inst in self.op.instances:
11136         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11137         nodeslist.append(inst.pnode)
11138         if inst.snode is not None:
11139           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11140           nodeslist.append(inst.snode)
11141
11142       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11143       # Lock resources of instance's primary and secondary nodes (copy to
11144       # prevent accidential modification)
11145       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11146
11147   def CheckPrereq(self):
11148     """Check prerequisite.
11149
11150     """
11151     cluster = self.cfg.GetClusterInfo()
11152     default_vg = self.cfg.GetVGName()
11153     ec_id = self.proc.GetECId()
11154
11155     if self.op.opportunistic_locking:
11156       # Only consider nodes for which a lock is held
11157       node_whitelist = self.owned_locks(locking.LEVEL_NODE)
11158     else:
11159       node_whitelist = None
11160
11161     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11162                                          _ComputeNics(op, cluster, None,
11163                                                       self.cfg, ec_id),
11164                                          _ComputeFullBeParams(op, cluster),
11165                                          node_whitelist)
11166              for op in self.op.instances]
11167
11168     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11169     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11170
11171     ial.Run(self.op.iallocator)
11172
11173     if not ial.success:
11174       raise errors.OpPrereqError("Can't compute nodes using"
11175                                  " iallocator '%s': %s" %
11176                                  (self.op.iallocator, ial.info),
11177                                  errors.ECODE_NORES)
11178
11179     self.ia_result = ial.result
11180
11181     if self.op.dry_run:
11182       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11183         constants.JOB_IDS_KEY: [],
11184         })
11185
11186   def _ConstructPartialResult(self):
11187     """Contructs the partial result.
11188
11189     """
11190     (allocatable, failed) = self.ia_result
11191     return {
11192       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11193         map(compat.fst, allocatable),
11194       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11195       }
11196
11197   def Exec(self, feedback_fn):
11198     """Executes the opcode.
11199
11200     """
11201     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11202     (allocatable, failed) = self.ia_result
11203
11204     jobs = []
11205     for (name, nodes) in allocatable:
11206       op = op2inst.pop(name)
11207
11208       if len(nodes) > 1:
11209         (op.pnode, op.snode) = nodes
11210       else:
11211         (op.pnode,) = nodes
11212
11213       jobs.append([op])
11214
11215     missing = set(op2inst.keys()) - set(failed)
11216     assert not missing, \
11217       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11218
11219     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11220
11221
11222 def _CheckRADOSFreeSpace():
11223   """Compute disk size requirements inside the RADOS cluster.
11224
11225   """
11226   # For the RADOS cluster we assume there is always enough space.
11227   pass
11228
11229
11230 class LUInstanceConsole(NoHooksLU):
11231   """Connect to an instance's console.
11232
11233   This is somewhat special in that it returns the command line that
11234   you need to run on the master node in order to connect to the
11235   console.
11236
11237   """
11238   REQ_BGL = False
11239
11240   def ExpandNames(self):
11241     self.share_locks = _ShareAll()
11242     self._ExpandAndLockInstance()
11243
11244   def CheckPrereq(self):
11245     """Check prerequisites.
11246
11247     This checks that the instance is in the cluster.
11248
11249     """
11250     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11251     assert self.instance is not None, \
11252       "Cannot retrieve locked instance %s" % self.op.instance_name
11253     _CheckNodeOnline(self, self.instance.primary_node)
11254
11255   def Exec(self, feedback_fn):
11256     """Connect to the console of an instance
11257
11258     """
11259     instance = self.instance
11260     node = instance.primary_node
11261
11262     node_insts = self.rpc.call_instance_list([node],
11263                                              [instance.hypervisor])[node]
11264     node_insts.Raise("Can't get node information from %s" % node)
11265
11266     if instance.name not in node_insts.payload:
11267       if instance.admin_state == constants.ADMINST_UP:
11268         state = constants.INSTST_ERRORDOWN
11269       elif instance.admin_state == constants.ADMINST_DOWN:
11270         state = constants.INSTST_ADMINDOWN
11271       else:
11272         state = constants.INSTST_ADMINOFFLINE
11273       raise errors.OpExecError("Instance %s is not running (state %s)" %
11274                                (instance.name, state))
11275
11276     logging.debug("Connecting to console of %s on %s", instance.name, node)
11277
11278     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11279
11280
11281 def _GetInstanceConsole(cluster, instance):
11282   """Returns console information for an instance.
11283
11284   @type cluster: L{objects.Cluster}
11285   @type instance: L{objects.Instance}
11286   @rtype: dict
11287
11288   """
11289   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11290   # beparams and hvparams are passed separately, to avoid editing the
11291   # instance and then saving the defaults in the instance itself.
11292   hvparams = cluster.FillHV(instance)
11293   beparams = cluster.FillBE(instance)
11294   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11295
11296   assert console.instance == instance.name
11297   assert console.Validate()
11298
11299   return console.ToDict()
11300
11301
11302 class LUInstanceReplaceDisks(LogicalUnit):
11303   """Replace the disks of an instance.
11304
11305   """
11306   HPATH = "mirrors-replace"
11307   HTYPE = constants.HTYPE_INSTANCE
11308   REQ_BGL = False
11309
11310   def CheckArguments(self):
11311     """Check arguments.
11312
11313     """
11314     remote_node = self.op.remote_node
11315     ialloc = self.op.iallocator
11316     if self.op.mode == constants.REPLACE_DISK_CHG:
11317       if remote_node is None and ialloc is None:
11318         raise errors.OpPrereqError("When changing the secondary either an"
11319                                    " iallocator script must be used or the"
11320                                    " new node given", errors.ECODE_INVAL)
11321       else:
11322         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11323
11324     elif remote_node is not None or ialloc is not None:
11325       # Not replacing the secondary
11326       raise errors.OpPrereqError("The iallocator and new node options can"
11327                                  " only be used when changing the"
11328                                  " secondary node", errors.ECODE_INVAL)
11329
11330   def ExpandNames(self):
11331     self._ExpandAndLockInstance()
11332
11333     assert locking.LEVEL_NODE not in self.needed_locks
11334     assert locking.LEVEL_NODE_RES not in self.needed_locks
11335     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11336
11337     assert self.op.iallocator is None or self.op.remote_node is None, \
11338       "Conflicting options"
11339
11340     if self.op.remote_node is not None:
11341       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11342
11343       # Warning: do not remove the locking of the new secondary here
11344       # unless DRBD8.AddChildren is changed to work in parallel;
11345       # currently it doesn't since parallel invocations of
11346       # FindUnusedMinor will conflict
11347       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11348       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11349     else:
11350       self.needed_locks[locking.LEVEL_NODE] = []
11351       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11352
11353       if self.op.iallocator is not None:
11354         # iallocator will select a new node in the same group
11355         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11356         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11357
11358     self.needed_locks[locking.LEVEL_NODE_RES] = []
11359
11360     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11361                                    self.op.iallocator, self.op.remote_node,
11362                                    self.op.disks, self.op.early_release,
11363                                    self.op.ignore_ipolicy)
11364
11365     self.tasklets = [self.replacer]
11366
11367   def DeclareLocks(self, level):
11368     if level == locking.LEVEL_NODEGROUP:
11369       assert self.op.remote_node is None
11370       assert self.op.iallocator is not None
11371       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11372
11373       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11374       # Lock all groups used by instance optimistically; this requires going
11375       # via the node before it's locked, requiring verification later on
11376       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11377         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11378
11379     elif level == locking.LEVEL_NODE:
11380       if self.op.iallocator is not None:
11381         assert self.op.remote_node is None
11382         assert not self.needed_locks[locking.LEVEL_NODE]
11383         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11384
11385         # Lock member nodes of all locked groups
11386         self.needed_locks[locking.LEVEL_NODE] = \
11387             [node_name
11388              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11389              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11390       else:
11391         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11392
11393         self._LockInstancesNodes()
11394
11395     elif level == locking.LEVEL_NODE_RES:
11396       # Reuse node locks
11397       self.needed_locks[locking.LEVEL_NODE_RES] = \
11398         self.needed_locks[locking.LEVEL_NODE]
11399
11400   def BuildHooksEnv(self):
11401     """Build hooks env.
11402
11403     This runs on the master, the primary and all the secondaries.
11404
11405     """
11406     instance = self.replacer.instance
11407     env = {
11408       "MODE": self.op.mode,
11409       "NEW_SECONDARY": self.op.remote_node,
11410       "OLD_SECONDARY": instance.secondary_nodes[0],
11411       }
11412     env.update(_BuildInstanceHookEnvByObject(self, instance))
11413     return env
11414
11415   def BuildHooksNodes(self):
11416     """Build hooks nodes.
11417
11418     """
11419     instance = self.replacer.instance
11420     nl = [
11421       self.cfg.GetMasterNode(),
11422       instance.primary_node,
11423       ]
11424     if self.op.remote_node is not None:
11425       nl.append(self.op.remote_node)
11426     return nl, nl
11427
11428   def CheckPrereq(self):
11429     """Check prerequisites.
11430
11431     """
11432     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11433             self.op.iallocator is None)
11434
11435     # Verify if node group locks are still correct
11436     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11437     if owned_groups:
11438       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11439
11440     return LogicalUnit.CheckPrereq(self)
11441
11442
11443 class TLReplaceDisks(Tasklet):
11444   """Replaces disks for an instance.
11445
11446   Note: Locking is not within the scope of this class.
11447
11448   """
11449   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11450                disks, early_release, ignore_ipolicy):
11451     """Initializes this class.
11452
11453     """
11454     Tasklet.__init__(self, lu)
11455
11456     # Parameters
11457     self.instance_name = instance_name
11458     self.mode = mode
11459     self.iallocator_name = iallocator_name
11460     self.remote_node = remote_node
11461     self.disks = disks
11462     self.early_release = early_release
11463     self.ignore_ipolicy = ignore_ipolicy
11464
11465     # Runtime data
11466     self.instance = None
11467     self.new_node = None
11468     self.target_node = None
11469     self.other_node = None
11470     self.remote_node_info = None
11471     self.node_secondary_ip = None
11472
11473   @staticmethod
11474   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11475     """Compute a new secondary node using an IAllocator.
11476
11477     """
11478     req = iallocator.IAReqRelocate(name=instance_name,
11479                                    relocate_from=list(relocate_from))
11480     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11481
11482     ial.Run(iallocator_name)
11483
11484     if not ial.success:
11485       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11486                                  " %s" % (iallocator_name, ial.info),
11487                                  errors.ECODE_NORES)
11488
11489     remote_node_name = ial.result[0]
11490
11491     lu.LogInfo("Selected new secondary for instance '%s': %s",
11492                instance_name, remote_node_name)
11493
11494     return remote_node_name
11495
11496   def _FindFaultyDisks(self, node_name):
11497     """Wrapper for L{_FindFaultyInstanceDisks}.
11498
11499     """
11500     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11501                                     node_name, True)
11502
11503   def _CheckDisksActivated(self, instance):
11504     """Checks if the instance disks are activated.
11505
11506     @param instance: The instance to check disks
11507     @return: True if they are activated, False otherwise
11508
11509     """
11510     nodes = instance.all_nodes
11511
11512     for idx, dev in enumerate(instance.disks):
11513       for node in nodes:
11514         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11515         self.cfg.SetDiskID(dev, node)
11516
11517         result = _BlockdevFind(self, node, dev, instance)
11518
11519         if result.offline:
11520           continue
11521         elif result.fail_msg or not result.payload:
11522           return False
11523
11524     return True
11525
11526   def CheckPrereq(self):
11527     """Check prerequisites.
11528
11529     This checks that the instance is in the cluster.
11530
11531     """
11532     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11533     assert instance is not None, \
11534       "Cannot retrieve locked instance %s" % self.instance_name
11535
11536     if instance.disk_template != constants.DT_DRBD8:
11537       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11538                                  " instances", errors.ECODE_INVAL)
11539
11540     if len(instance.secondary_nodes) != 1:
11541       raise errors.OpPrereqError("The instance has a strange layout,"
11542                                  " expected one secondary but found %d" %
11543                                  len(instance.secondary_nodes),
11544                                  errors.ECODE_FAULT)
11545
11546     instance = self.instance
11547     secondary_node = instance.secondary_nodes[0]
11548
11549     if self.iallocator_name is None:
11550       remote_node = self.remote_node
11551     else:
11552       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11553                                        instance.name, instance.secondary_nodes)
11554
11555     if remote_node is None:
11556       self.remote_node_info = None
11557     else:
11558       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11559              "Remote node '%s' is not locked" % remote_node
11560
11561       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11562       assert self.remote_node_info is not None, \
11563         "Cannot retrieve locked node %s" % remote_node
11564
11565     if remote_node == self.instance.primary_node:
11566       raise errors.OpPrereqError("The specified node is the primary node of"
11567                                  " the instance", errors.ECODE_INVAL)
11568
11569     if remote_node == secondary_node:
11570       raise errors.OpPrereqError("The specified node is already the"
11571                                  " secondary node of the instance",
11572                                  errors.ECODE_INVAL)
11573
11574     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11575                                     constants.REPLACE_DISK_CHG):
11576       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11577                                  errors.ECODE_INVAL)
11578
11579     if self.mode == constants.REPLACE_DISK_AUTO:
11580       if not self._CheckDisksActivated(instance):
11581         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11582                                    " first" % self.instance_name,
11583                                    errors.ECODE_STATE)
11584       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11585       faulty_secondary = self._FindFaultyDisks(secondary_node)
11586
11587       if faulty_primary and faulty_secondary:
11588         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11589                                    " one node and can not be repaired"
11590                                    " automatically" % self.instance_name,
11591                                    errors.ECODE_STATE)
11592
11593       if faulty_primary:
11594         self.disks = faulty_primary
11595         self.target_node = instance.primary_node
11596         self.other_node = secondary_node
11597         check_nodes = [self.target_node, self.other_node]
11598       elif faulty_secondary:
11599         self.disks = faulty_secondary
11600         self.target_node = secondary_node
11601         self.other_node = instance.primary_node
11602         check_nodes = [self.target_node, self.other_node]
11603       else:
11604         self.disks = []
11605         check_nodes = []
11606
11607     else:
11608       # Non-automatic modes
11609       if self.mode == constants.REPLACE_DISK_PRI:
11610         self.target_node = instance.primary_node
11611         self.other_node = secondary_node
11612         check_nodes = [self.target_node, self.other_node]
11613
11614       elif self.mode == constants.REPLACE_DISK_SEC:
11615         self.target_node = secondary_node
11616         self.other_node = instance.primary_node
11617         check_nodes = [self.target_node, self.other_node]
11618
11619       elif self.mode == constants.REPLACE_DISK_CHG:
11620         self.new_node = remote_node
11621         self.other_node = instance.primary_node
11622         self.target_node = secondary_node
11623         check_nodes = [self.new_node, self.other_node]
11624
11625         _CheckNodeNotDrained(self.lu, remote_node)
11626         _CheckNodeVmCapable(self.lu, remote_node)
11627
11628         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11629         assert old_node_info is not None
11630         if old_node_info.offline and not self.early_release:
11631           # doesn't make sense to delay the release
11632           self.early_release = True
11633           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11634                           " early-release mode", secondary_node)
11635
11636       else:
11637         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11638                                      self.mode)
11639
11640       # If not specified all disks should be replaced
11641       if not self.disks:
11642         self.disks = range(len(self.instance.disks))
11643
11644     # TODO: This is ugly, but right now we can't distinguish between internal
11645     # submitted opcode and external one. We should fix that.
11646     if self.remote_node_info:
11647       # We change the node, lets verify it still meets instance policy
11648       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11649       cluster = self.cfg.GetClusterInfo()
11650       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11651                                                               new_group_info)
11652       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11653                               ignore=self.ignore_ipolicy)
11654
11655     for node in check_nodes:
11656       _CheckNodeOnline(self.lu, node)
11657
11658     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11659                                                           self.other_node,
11660                                                           self.target_node]
11661                               if node_name is not None)
11662
11663     # Release unneeded node and node resource locks
11664     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11665     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11666     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11667
11668     # Release any owned node group
11669     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11670
11671     # Check whether disks are valid
11672     for disk_idx in self.disks:
11673       instance.FindDisk(disk_idx)
11674
11675     # Get secondary node IP addresses
11676     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11677                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11678
11679   def Exec(self, feedback_fn):
11680     """Execute disk replacement.
11681
11682     This dispatches the disk replacement to the appropriate handler.
11683
11684     """
11685     if __debug__:
11686       # Verify owned locks before starting operation
11687       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11688       assert set(owned_nodes) == set(self.node_secondary_ip), \
11689           ("Incorrect node locks, owning %s, expected %s" %
11690            (owned_nodes, self.node_secondary_ip.keys()))
11691       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11692               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11693       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11694
11695       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11696       assert list(owned_instances) == [self.instance_name], \
11697           "Instance '%s' not locked" % self.instance_name
11698
11699       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11700           "Should not own any node group lock at this point"
11701
11702     if not self.disks:
11703       feedback_fn("No disks need replacement for instance '%s'" %
11704                   self.instance.name)
11705       return
11706
11707     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11708                 (utils.CommaJoin(self.disks), self.instance.name))
11709     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11710     feedback_fn("Current seconary node: %s" %
11711                 utils.CommaJoin(self.instance.secondary_nodes))
11712
11713     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11714
11715     # Activate the instance disks if we're replacing them on a down instance
11716     if activate_disks:
11717       _StartInstanceDisks(self.lu, self.instance, True)
11718
11719     try:
11720       # Should we replace the secondary node?
11721       if self.new_node is not None:
11722         fn = self._ExecDrbd8Secondary
11723       else:
11724         fn = self._ExecDrbd8DiskOnly
11725
11726       result = fn(feedback_fn)
11727     finally:
11728       # Deactivate the instance disks if we're replacing them on a
11729       # down instance
11730       if activate_disks:
11731         _SafeShutdownInstanceDisks(self.lu, self.instance)
11732
11733     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11734
11735     if __debug__:
11736       # Verify owned locks
11737       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11738       nodes = frozenset(self.node_secondary_ip)
11739       assert ((self.early_release and not owned_nodes) or
11740               (not self.early_release and not (set(owned_nodes) - nodes))), \
11741         ("Not owning the correct locks, early_release=%s, owned=%r,"
11742          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11743
11744     return result
11745
11746   def _CheckVolumeGroup(self, nodes):
11747     self.lu.LogInfo("Checking volume groups")
11748
11749     vgname = self.cfg.GetVGName()
11750
11751     # Make sure volume group exists on all involved nodes
11752     results = self.rpc.call_vg_list(nodes)
11753     if not results:
11754       raise errors.OpExecError("Can't list volume groups on the nodes")
11755
11756     for node in nodes:
11757       res = results[node]
11758       res.Raise("Error checking node %s" % node)
11759       if vgname not in res.payload:
11760         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11761                                  (vgname, node))
11762
11763   def _CheckDisksExistence(self, nodes):
11764     # Check disk existence
11765     for idx, dev in enumerate(self.instance.disks):
11766       if idx not in self.disks:
11767         continue
11768
11769       for node in nodes:
11770         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11771         self.cfg.SetDiskID(dev, node)
11772
11773         result = _BlockdevFind(self, node, dev, self.instance)
11774
11775         msg = result.fail_msg
11776         if msg or not result.payload:
11777           if not msg:
11778             msg = "disk not found"
11779           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11780                                    (idx, node, msg))
11781
11782   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11783     for idx, dev in enumerate(self.instance.disks):
11784       if idx not in self.disks:
11785         continue
11786
11787       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11788                       (idx, node_name))
11789
11790       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11791                                    on_primary, ldisk=ldisk):
11792         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11793                                  " replace disks for instance %s" %
11794                                  (node_name, self.instance.name))
11795
11796   def _CreateNewStorage(self, node_name):
11797     """Create new storage on the primary or secondary node.
11798
11799     This is only used for same-node replaces, not for changing the
11800     secondary node, hence we don't want to modify the existing disk.
11801
11802     """
11803     iv_names = {}
11804
11805     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11806     for idx, dev in enumerate(disks):
11807       if idx not in self.disks:
11808         continue
11809
11810       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11811
11812       self.cfg.SetDiskID(dev, node_name)
11813
11814       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11815       names = _GenerateUniqueNames(self.lu, lv_names)
11816
11817       (data_disk, meta_disk) = dev.children
11818       vg_data = data_disk.logical_id[0]
11819       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11820                              logical_id=(vg_data, names[0]),
11821                              params=data_disk.params)
11822       vg_meta = meta_disk.logical_id[0]
11823       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11824                              size=constants.DRBD_META_SIZE,
11825                              logical_id=(vg_meta, names[1]),
11826                              params=meta_disk.params)
11827
11828       new_lvs = [lv_data, lv_meta]
11829       old_lvs = [child.Copy() for child in dev.children]
11830       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11831       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11832
11833       # we pass force_create=True to force the LVM creation
11834       for new_lv in new_lvs:
11835         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11836                              _GetInstanceInfoText(self.instance), False,
11837                              excl_stor)
11838
11839     return iv_names
11840
11841   def _CheckDevices(self, node_name, iv_names):
11842     for name, (dev, _, _) in iv_names.iteritems():
11843       self.cfg.SetDiskID(dev, node_name)
11844
11845       result = _BlockdevFind(self, node_name, dev, self.instance)
11846
11847       msg = result.fail_msg
11848       if msg or not result.payload:
11849         if not msg:
11850           msg = "disk not found"
11851         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11852                                  (name, msg))
11853
11854       if result.payload.is_degraded:
11855         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11856
11857   def _RemoveOldStorage(self, node_name, iv_names):
11858     for name, (_, old_lvs, _) in iv_names.iteritems():
11859       self.lu.LogInfo("Remove logical volumes for %s", name)
11860
11861       for lv in old_lvs:
11862         self.cfg.SetDiskID(lv, node_name)
11863
11864         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11865         if msg:
11866           self.lu.LogWarning("Can't remove old LV: %s", msg,
11867                              hint="remove unused LVs manually")
11868
11869   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11870     """Replace a disk on the primary or secondary for DRBD 8.
11871
11872     The algorithm for replace is quite complicated:
11873
11874       1. for each disk to be replaced:
11875
11876         1. create new LVs on the target node with unique names
11877         1. detach old LVs from the drbd device
11878         1. rename old LVs to name_replaced.<time_t>
11879         1. rename new LVs to old LVs
11880         1. attach the new LVs (with the old names now) to the drbd device
11881
11882       1. wait for sync across all devices
11883
11884       1. for each modified disk:
11885
11886         1. remove old LVs (which have the name name_replaces.<time_t>)
11887
11888     Failures are not very well handled.
11889
11890     """
11891     steps_total = 6
11892
11893     # Step: check device activation
11894     self.lu.LogStep(1, steps_total, "Check device existence")
11895     self._CheckDisksExistence([self.other_node, self.target_node])
11896     self._CheckVolumeGroup([self.target_node, self.other_node])
11897
11898     # Step: check other node consistency
11899     self.lu.LogStep(2, steps_total, "Check peer consistency")
11900     self._CheckDisksConsistency(self.other_node,
11901                                 self.other_node == self.instance.primary_node,
11902                                 False)
11903
11904     # Step: create new storage
11905     self.lu.LogStep(3, steps_total, "Allocate new storage")
11906     iv_names = self._CreateNewStorage(self.target_node)
11907
11908     # Step: for each lv, detach+rename*2+attach
11909     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11910     for dev, old_lvs, new_lvs in iv_names.itervalues():
11911       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11912
11913       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11914                                                      old_lvs)
11915       result.Raise("Can't detach drbd from local storage on node"
11916                    " %s for device %s" % (self.target_node, dev.iv_name))
11917       #dev.children = []
11918       #cfg.Update(instance)
11919
11920       # ok, we created the new LVs, so now we know we have the needed
11921       # storage; as such, we proceed on the target node to rename
11922       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11923       # using the assumption that logical_id == physical_id (which in
11924       # turn is the unique_id on that node)
11925
11926       # FIXME(iustin): use a better name for the replaced LVs
11927       temp_suffix = int(time.time())
11928       ren_fn = lambda d, suff: (d.physical_id[0],
11929                                 d.physical_id[1] + "_replaced-%s" % suff)
11930
11931       # Build the rename list based on what LVs exist on the node
11932       rename_old_to_new = []
11933       for to_ren in old_lvs:
11934         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11935         if not result.fail_msg and result.payload:
11936           # device exists
11937           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11938
11939       self.lu.LogInfo("Renaming the old LVs on the target node")
11940       result = self.rpc.call_blockdev_rename(self.target_node,
11941                                              rename_old_to_new)
11942       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11943
11944       # Now we rename the new LVs to the old LVs
11945       self.lu.LogInfo("Renaming the new LVs on the target node")
11946       rename_new_to_old = [(new, old.physical_id)
11947                            for old, new in zip(old_lvs, new_lvs)]
11948       result = self.rpc.call_blockdev_rename(self.target_node,
11949                                              rename_new_to_old)
11950       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11951
11952       # Intermediate steps of in memory modifications
11953       for old, new in zip(old_lvs, new_lvs):
11954         new.logical_id = old.logical_id
11955         self.cfg.SetDiskID(new, self.target_node)
11956
11957       # We need to modify old_lvs so that removal later removes the
11958       # right LVs, not the newly added ones; note that old_lvs is a
11959       # copy here
11960       for disk in old_lvs:
11961         disk.logical_id = ren_fn(disk, temp_suffix)
11962         self.cfg.SetDiskID(disk, self.target_node)
11963
11964       # Now that the new lvs have the old name, we can add them to the device
11965       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11966       result = self.rpc.call_blockdev_addchildren(self.target_node,
11967                                                   (dev, self.instance), new_lvs)
11968       msg = result.fail_msg
11969       if msg:
11970         for new_lv in new_lvs:
11971           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11972                                                new_lv).fail_msg
11973           if msg2:
11974             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11975                                hint=("cleanup manually the unused logical"
11976                                      "volumes"))
11977         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11978
11979     cstep = itertools.count(5)
11980
11981     if self.early_release:
11982       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11983       self._RemoveOldStorage(self.target_node, iv_names)
11984       # TODO: Check if releasing locks early still makes sense
11985       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11986     else:
11987       # Release all resource locks except those used by the instance
11988       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11989                     keep=self.node_secondary_ip.keys())
11990
11991     # Release all node locks while waiting for sync
11992     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11993
11994     # TODO: Can the instance lock be downgraded here? Take the optional disk
11995     # shutdown in the caller into consideration.
11996
11997     # Wait for sync
11998     # This can fail as the old devices are degraded and _WaitForSync
11999     # does a combined result over all disks, so we don't check its return value
12000     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12001     _WaitForSync(self.lu, self.instance)
12002
12003     # Check all devices manually
12004     self._CheckDevices(self.instance.primary_node, iv_names)
12005
12006     # Step: remove old storage
12007     if not self.early_release:
12008       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12009       self._RemoveOldStorage(self.target_node, iv_names)
12010
12011   def _ExecDrbd8Secondary(self, feedback_fn):
12012     """Replace the secondary node for DRBD 8.
12013
12014     The algorithm for replace is quite complicated:
12015       - for all disks of the instance:
12016         - create new LVs on the new node with same names
12017         - shutdown the drbd device on the old secondary
12018         - disconnect the drbd network on the primary
12019         - create the drbd device on the new secondary
12020         - network attach the drbd on the primary, using an artifice:
12021           the drbd code for Attach() will connect to the network if it
12022           finds a device which is connected to the good local disks but
12023           not network enabled
12024       - wait for sync across all devices
12025       - remove all disks from the old secondary
12026
12027     Failures are not very well handled.
12028
12029     """
12030     steps_total = 6
12031
12032     pnode = self.instance.primary_node
12033
12034     # Step: check device activation
12035     self.lu.LogStep(1, steps_total, "Check device existence")
12036     self._CheckDisksExistence([self.instance.primary_node])
12037     self._CheckVolumeGroup([self.instance.primary_node])
12038
12039     # Step: check other node consistency
12040     self.lu.LogStep(2, steps_total, "Check peer consistency")
12041     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12042
12043     # Step: create new storage
12044     self.lu.LogStep(3, steps_total, "Allocate new storage")
12045     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12046     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12047     for idx, dev in enumerate(disks):
12048       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12049                       (self.new_node, idx))
12050       # we pass force_create=True to force LVM creation
12051       for new_lv in dev.children:
12052         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12053                              True, _GetInstanceInfoText(self.instance), False,
12054                              excl_stor)
12055
12056     # Step 4: dbrd minors and drbd setups changes
12057     # after this, we must manually remove the drbd minors on both the
12058     # error and the success paths
12059     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12060     minors = self.cfg.AllocateDRBDMinor([self.new_node
12061                                          for dev in self.instance.disks],
12062                                         self.instance.name)
12063     logging.debug("Allocated minors %r", minors)
12064
12065     iv_names = {}
12066     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12067       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12068                       (self.new_node, idx))
12069       # create new devices on new_node; note that we create two IDs:
12070       # one without port, so the drbd will be activated without
12071       # networking information on the new node at this stage, and one
12072       # with network, for the latter activation in step 4
12073       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12074       if self.instance.primary_node == o_node1:
12075         p_minor = o_minor1
12076       else:
12077         assert self.instance.primary_node == o_node2, "Three-node instance?"
12078         p_minor = o_minor2
12079
12080       new_alone_id = (self.instance.primary_node, self.new_node, None,
12081                       p_minor, new_minor, o_secret)
12082       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12083                     p_minor, new_minor, o_secret)
12084
12085       iv_names[idx] = (dev, dev.children, new_net_id)
12086       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12087                     new_net_id)
12088       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12089                               logical_id=new_alone_id,
12090                               children=dev.children,
12091                               size=dev.size,
12092                               params={})
12093       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12094                                              self.cfg)
12095       try:
12096         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12097                               anno_new_drbd,
12098                               _GetInstanceInfoText(self.instance), False,
12099                               excl_stor)
12100       except errors.GenericError:
12101         self.cfg.ReleaseDRBDMinors(self.instance.name)
12102         raise
12103
12104     # We have new devices, shutdown the drbd on the old secondary
12105     for idx, dev in enumerate(self.instance.disks):
12106       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12107       self.cfg.SetDiskID(dev, self.target_node)
12108       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12109                                             (dev, self.instance)).fail_msg
12110       if msg:
12111         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12112                            "node: %s" % (idx, msg),
12113                            hint=("Please cleanup this device manually as"
12114                                  " soon as possible"))
12115
12116     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12117     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12118                                                self.instance.disks)[pnode]
12119
12120     msg = result.fail_msg
12121     if msg:
12122       # detaches didn't succeed (unlikely)
12123       self.cfg.ReleaseDRBDMinors(self.instance.name)
12124       raise errors.OpExecError("Can't detach the disks from the network on"
12125                                " old node: %s" % (msg,))
12126
12127     # if we managed to detach at least one, we update all the disks of
12128     # the instance to point to the new secondary
12129     self.lu.LogInfo("Updating instance configuration")
12130     for dev, _, new_logical_id in iv_names.itervalues():
12131       dev.logical_id = new_logical_id
12132       self.cfg.SetDiskID(dev, self.instance.primary_node)
12133
12134     self.cfg.Update(self.instance, feedback_fn)
12135
12136     # Release all node locks (the configuration has been updated)
12137     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12138
12139     # and now perform the drbd attach
12140     self.lu.LogInfo("Attaching primary drbds to new secondary"
12141                     " (standalone => connected)")
12142     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12143                                             self.new_node],
12144                                            self.node_secondary_ip,
12145                                            (self.instance.disks, self.instance),
12146                                            self.instance.name,
12147                                            False)
12148     for to_node, to_result in result.items():
12149       msg = to_result.fail_msg
12150       if msg:
12151         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12152                            to_node, msg,
12153                            hint=("please do a gnt-instance info to see the"
12154                                  " status of disks"))
12155
12156     cstep = itertools.count(5)
12157
12158     if self.early_release:
12159       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12160       self._RemoveOldStorage(self.target_node, iv_names)
12161       # TODO: Check if releasing locks early still makes sense
12162       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12163     else:
12164       # Release all resource locks except those used by the instance
12165       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12166                     keep=self.node_secondary_ip.keys())
12167
12168     # TODO: Can the instance lock be downgraded here? Take the optional disk
12169     # shutdown in the caller into consideration.
12170
12171     # Wait for sync
12172     # This can fail as the old devices are degraded and _WaitForSync
12173     # does a combined result over all disks, so we don't check its return value
12174     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12175     _WaitForSync(self.lu, self.instance)
12176
12177     # Check all devices manually
12178     self._CheckDevices(self.instance.primary_node, iv_names)
12179
12180     # Step: remove old storage
12181     if not self.early_release:
12182       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12183       self._RemoveOldStorage(self.target_node, iv_names)
12184
12185
12186 class LURepairNodeStorage(NoHooksLU):
12187   """Repairs the volume group on a node.
12188
12189   """
12190   REQ_BGL = False
12191
12192   def CheckArguments(self):
12193     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12194
12195     storage_type = self.op.storage_type
12196
12197     if (constants.SO_FIX_CONSISTENCY not in
12198         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12199       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12200                                  " repaired" % storage_type,
12201                                  errors.ECODE_INVAL)
12202
12203   def ExpandNames(self):
12204     self.needed_locks = {
12205       locking.LEVEL_NODE: [self.op.node_name],
12206       }
12207
12208   def _CheckFaultyDisks(self, instance, node_name):
12209     """Ensure faulty disks abort the opcode or at least warn."""
12210     try:
12211       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12212                                   node_name, True):
12213         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12214                                    " node '%s'" % (instance.name, node_name),
12215                                    errors.ECODE_STATE)
12216     except errors.OpPrereqError, err:
12217       if self.op.ignore_consistency:
12218         self.LogWarning(str(err.args[0]))
12219       else:
12220         raise
12221
12222   def CheckPrereq(self):
12223     """Check prerequisites.
12224
12225     """
12226     # Check whether any instance on this node has faulty disks
12227     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12228       if inst.admin_state != constants.ADMINST_UP:
12229         continue
12230       check_nodes = set(inst.all_nodes)
12231       check_nodes.discard(self.op.node_name)
12232       for inst_node_name in check_nodes:
12233         self._CheckFaultyDisks(inst, inst_node_name)
12234
12235   def Exec(self, feedback_fn):
12236     feedback_fn("Repairing storage unit '%s' on %s ..." %
12237                 (self.op.name, self.op.node_name))
12238
12239     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12240     result = self.rpc.call_storage_execute(self.op.node_name,
12241                                            self.op.storage_type, st_args,
12242                                            self.op.name,
12243                                            constants.SO_FIX_CONSISTENCY)
12244     result.Raise("Failed to repair storage unit '%s' on %s" %
12245                  (self.op.name, self.op.node_name))
12246
12247
12248 class LUNodeEvacuate(NoHooksLU):
12249   """Evacuates instances off a list of nodes.
12250
12251   """
12252   REQ_BGL = False
12253
12254   _MODE2IALLOCATOR = {
12255     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12256     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12257     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12258     }
12259   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12260   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12261           constants.IALLOCATOR_NEVAC_MODES)
12262
12263   def CheckArguments(self):
12264     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12265
12266   def ExpandNames(self):
12267     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12268
12269     if self.op.remote_node is not None:
12270       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12271       assert self.op.remote_node
12272
12273       if self.op.remote_node == self.op.node_name:
12274         raise errors.OpPrereqError("Can not use evacuated node as a new"
12275                                    " secondary node", errors.ECODE_INVAL)
12276
12277       if self.op.mode != constants.NODE_EVAC_SEC:
12278         raise errors.OpPrereqError("Without the use of an iallocator only"
12279                                    " secondary instances can be evacuated",
12280                                    errors.ECODE_INVAL)
12281
12282     # Declare locks
12283     self.share_locks = _ShareAll()
12284     self.needed_locks = {
12285       locking.LEVEL_INSTANCE: [],
12286       locking.LEVEL_NODEGROUP: [],
12287       locking.LEVEL_NODE: [],
12288       }
12289
12290     # Determine nodes (via group) optimistically, needs verification once locks
12291     # have been acquired
12292     self.lock_nodes = self._DetermineNodes()
12293
12294   def _DetermineNodes(self):
12295     """Gets the list of nodes to operate on.
12296
12297     """
12298     if self.op.remote_node is None:
12299       # Iallocator will choose any node(s) in the same group
12300       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12301     else:
12302       group_nodes = frozenset([self.op.remote_node])
12303
12304     # Determine nodes to be locked
12305     return set([self.op.node_name]) | group_nodes
12306
12307   def _DetermineInstances(self):
12308     """Builds list of instances to operate on.
12309
12310     """
12311     assert self.op.mode in constants.NODE_EVAC_MODES
12312
12313     if self.op.mode == constants.NODE_EVAC_PRI:
12314       # Primary instances only
12315       inst_fn = _GetNodePrimaryInstances
12316       assert self.op.remote_node is None, \
12317         "Evacuating primary instances requires iallocator"
12318     elif self.op.mode == constants.NODE_EVAC_SEC:
12319       # Secondary instances only
12320       inst_fn = _GetNodeSecondaryInstances
12321     else:
12322       # All instances
12323       assert self.op.mode == constants.NODE_EVAC_ALL
12324       inst_fn = _GetNodeInstances
12325       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12326       # per instance
12327       raise errors.OpPrereqError("Due to an issue with the iallocator"
12328                                  " interface it is not possible to evacuate"
12329                                  " all instances at once; specify explicitly"
12330                                  " whether to evacuate primary or secondary"
12331                                  " instances",
12332                                  errors.ECODE_INVAL)
12333
12334     return inst_fn(self.cfg, self.op.node_name)
12335
12336   def DeclareLocks(self, level):
12337     if level == locking.LEVEL_INSTANCE:
12338       # Lock instances optimistically, needs verification once node and group
12339       # locks have been acquired
12340       self.needed_locks[locking.LEVEL_INSTANCE] = \
12341         set(i.name for i in self._DetermineInstances())
12342
12343     elif level == locking.LEVEL_NODEGROUP:
12344       # Lock node groups for all potential target nodes optimistically, needs
12345       # verification once nodes have been acquired
12346       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12347         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12348
12349     elif level == locking.LEVEL_NODE:
12350       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12351
12352   def CheckPrereq(self):
12353     # Verify locks
12354     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12355     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12356     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12357
12358     need_nodes = self._DetermineNodes()
12359
12360     if not owned_nodes.issuperset(need_nodes):
12361       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12362                                  " locks were acquired, current nodes are"
12363                                  " are '%s', used to be '%s'; retry the"
12364                                  " operation" %
12365                                  (self.op.node_name,
12366                                   utils.CommaJoin(need_nodes),
12367                                   utils.CommaJoin(owned_nodes)),
12368                                  errors.ECODE_STATE)
12369
12370     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12371     if owned_groups != wanted_groups:
12372       raise errors.OpExecError("Node groups changed since locks were acquired,"
12373                                " current groups are '%s', used to be '%s';"
12374                                " retry the operation" %
12375                                (utils.CommaJoin(wanted_groups),
12376                                 utils.CommaJoin(owned_groups)))
12377
12378     # Determine affected instances
12379     self.instances = self._DetermineInstances()
12380     self.instance_names = [i.name for i in self.instances]
12381
12382     if set(self.instance_names) != owned_instances:
12383       raise errors.OpExecError("Instances on node '%s' changed since locks"
12384                                " were acquired, current instances are '%s',"
12385                                " used to be '%s'; retry the operation" %
12386                                (self.op.node_name,
12387                                 utils.CommaJoin(self.instance_names),
12388                                 utils.CommaJoin(owned_instances)))
12389
12390     if self.instance_names:
12391       self.LogInfo("Evacuating instances from node '%s': %s",
12392                    self.op.node_name,
12393                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12394     else:
12395       self.LogInfo("No instances to evacuate from node '%s'",
12396                    self.op.node_name)
12397
12398     if self.op.remote_node is not None:
12399       for i in self.instances:
12400         if i.primary_node == self.op.remote_node:
12401           raise errors.OpPrereqError("Node %s is the primary node of"
12402                                      " instance %s, cannot use it as"
12403                                      " secondary" %
12404                                      (self.op.remote_node, i.name),
12405                                      errors.ECODE_INVAL)
12406
12407   def Exec(self, feedback_fn):
12408     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12409
12410     if not self.instance_names:
12411       # No instances to evacuate
12412       jobs = []
12413
12414     elif self.op.iallocator is not None:
12415       # TODO: Implement relocation to other group
12416       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12417       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12418                                      instances=list(self.instance_names))
12419       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12420
12421       ial.Run(self.op.iallocator)
12422
12423       if not ial.success:
12424         raise errors.OpPrereqError("Can't compute node evacuation using"
12425                                    " iallocator '%s': %s" %
12426                                    (self.op.iallocator, ial.info),
12427                                    errors.ECODE_NORES)
12428
12429       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12430
12431     elif self.op.remote_node is not None:
12432       assert self.op.mode == constants.NODE_EVAC_SEC
12433       jobs = [
12434         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12435                                         remote_node=self.op.remote_node,
12436                                         disks=[],
12437                                         mode=constants.REPLACE_DISK_CHG,
12438                                         early_release=self.op.early_release)]
12439         for instance_name in self.instance_names]
12440
12441     else:
12442       raise errors.ProgrammerError("No iallocator or remote node")
12443
12444     return ResultWithJobs(jobs)
12445
12446
12447 def _SetOpEarlyRelease(early_release, op):
12448   """Sets C{early_release} flag on opcodes if available.
12449
12450   """
12451   try:
12452     op.early_release = early_release
12453   except AttributeError:
12454     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12455
12456   return op
12457
12458
12459 def _NodeEvacDest(use_nodes, group, nodes):
12460   """Returns group or nodes depending on caller's choice.
12461
12462   """
12463   if use_nodes:
12464     return utils.CommaJoin(nodes)
12465   else:
12466     return group
12467
12468
12469 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12470   """Unpacks the result of change-group and node-evacuate iallocator requests.
12471
12472   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12473   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12474
12475   @type lu: L{LogicalUnit}
12476   @param lu: Logical unit instance
12477   @type alloc_result: tuple/list
12478   @param alloc_result: Result from iallocator
12479   @type early_release: bool
12480   @param early_release: Whether to release locks early if possible
12481   @type use_nodes: bool
12482   @param use_nodes: Whether to display node names instead of groups
12483
12484   """
12485   (moved, failed, jobs) = alloc_result
12486
12487   if failed:
12488     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12489                                  for (name, reason) in failed)
12490     lu.LogWarning("Unable to evacuate instances %s", failreason)
12491     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12492
12493   if moved:
12494     lu.LogInfo("Instances to be moved: %s",
12495                utils.CommaJoin("%s (to %s)" %
12496                                (name, _NodeEvacDest(use_nodes, group, nodes))
12497                                for (name, group, nodes) in moved))
12498
12499   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12500               map(opcodes.OpCode.LoadOpCode, ops))
12501           for ops in jobs]
12502
12503
12504 def _DiskSizeInBytesToMebibytes(lu, size):
12505   """Converts a disk size in bytes to mebibytes.
12506
12507   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12508
12509   """
12510   (mib, remainder) = divmod(size, 1024 * 1024)
12511
12512   if remainder != 0:
12513     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12514                   " to not overwrite existing data (%s bytes will not be"
12515                   " wiped)", (1024 * 1024) - remainder)
12516     mib += 1
12517
12518   return mib
12519
12520
12521 class LUInstanceGrowDisk(LogicalUnit):
12522   """Grow a disk of an instance.
12523
12524   """
12525   HPATH = "disk-grow"
12526   HTYPE = constants.HTYPE_INSTANCE
12527   REQ_BGL = False
12528
12529   def ExpandNames(self):
12530     self._ExpandAndLockInstance()
12531     self.needed_locks[locking.LEVEL_NODE] = []
12532     self.needed_locks[locking.LEVEL_NODE_RES] = []
12533     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12534     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12535
12536   def DeclareLocks(self, level):
12537     if level == locking.LEVEL_NODE:
12538       self._LockInstancesNodes()
12539     elif level == locking.LEVEL_NODE_RES:
12540       # Copy node locks
12541       self.needed_locks[locking.LEVEL_NODE_RES] = \
12542         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12543
12544   def BuildHooksEnv(self):
12545     """Build hooks env.
12546
12547     This runs on the master, the primary and all the secondaries.
12548
12549     """
12550     env = {
12551       "DISK": self.op.disk,
12552       "AMOUNT": self.op.amount,
12553       "ABSOLUTE": self.op.absolute,
12554       }
12555     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12556     return env
12557
12558   def BuildHooksNodes(self):
12559     """Build hooks nodes.
12560
12561     """
12562     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12563     return (nl, nl)
12564
12565   def CheckPrereq(self):
12566     """Check prerequisites.
12567
12568     This checks that the instance is in the cluster.
12569
12570     """
12571     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12572     assert instance is not None, \
12573       "Cannot retrieve locked instance %s" % self.op.instance_name
12574     nodenames = list(instance.all_nodes)
12575     for node in nodenames:
12576       _CheckNodeOnline(self, node)
12577
12578     self.instance = instance
12579
12580     if instance.disk_template not in constants.DTS_GROWABLE:
12581       raise errors.OpPrereqError("Instance's disk layout does not support"
12582                                  " growing", errors.ECODE_INVAL)
12583
12584     self.disk = instance.FindDisk(self.op.disk)
12585
12586     if self.op.absolute:
12587       self.target = self.op.amount
12588       self.delta = self.target - self.disk.size
12589       if self.delta < 0:
12590         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12591                                    "current disk size (%s)" %
12592                                    (utils.FormatUnit(self.target, "h"),
12593                                     utils.FormatUnit(self.disk.size, "h")),
12594                                    errors.ECODE_STATE)
12595     else:
12596       self.delta = self.op.amount
12597       self.target = self.disk.size + self.delta
12598       if self.delta < 0:
12599         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12600                                    utils.FormatUnit(self.delta, "h"),
12601                                    errors.ECODE_INVAL)
12602
12603     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12604
12605   def _CheckDiskSpace(self, nodenames, req_vgspace):
12606     template = self.instance.disk_template
12607     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12608       # TODO: check the free disk space for file, when that feature will be
12609       # supported
12610       nodes = map(self.cfg.GetNodeInfo, nodenames)
12611       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12612                         nodes)
12613       if es_nodes:
12614         # With exclusive storage we need to something smarter than just looking
12615         # at free space; for now, let's simply abort the operation.
12616         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12617                                    " is enabled", errors.ECODE_STATE)
12618       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12619
12620   def Exec(self, feedback_fn):
12621     """Execute disk grow.
12622
12623     """
12624     instance = self.instance
12625     disk = self.disk
12626
12627     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12628     assert (self.owned_locks(locking.LEVEL_NODE) ==
12629             self.owned_locks(locking.LEVEL_NODE_RES))
12630
12631     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12632
12633     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12634     if not disks_ok:
12635       raise errors.OpExecError("Cannot activate block device to grow")
12636
12637     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12638                 (self.op.disk, instance.name,
12639                  utils.FormatUnit(self.delta, "h"),
12640                  utils.FormatUnit(self.target, "h")))
12641
12642     # First run all grow ops in dry-run mode
12643     for node in instance.all_nodes:
12644       self.cfg.SetDiskID(disk, node)
12645       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12646                                            True, True)
12647       result.Raise("Dry-run grow request failed to node %s" % node)
12648
12649     if wipe_disks:
12650       # Get disk size from primary node for wiping
12651       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12652       result.Raise("Failed to retrieve disk size from node '%s'" %
12653                    instance.primary_node)
12654
12655       (disk_size_in_bytes, ) = result.payload
12656
12657       if disk_size_in_bytes is None:
12658         raise errors.OpExecError("Failed to retrieve disk size from primary"
12659                                  " node '%s'" % instance.primary_node)
12660
12661       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12662
12663       assert old_disk_size >= disk.size, \
12664         ("Retrieved disk size too small (got %s, should be at least %s)" %
12665          (old_disk_size, disk.size))
12666     else:
12667       old_disk_size = None
12668
12669     # We know that (as far as we can test) operations across different
12670     # nodes will succeed, time to run it for real on the backing storage
12671     for node in instance.all_nodes:
12672       self.cfg.SetDiskID(disk, node)
12673       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12674                                            False, True)
12675       result.Raise("Grow request failed to node %s" % node)
12676
12677     # And now execute it for logical storage, on the primary node
12678     node = instance.primary_node
12679     self.cfg.SetDiskID(disk, node)
12680     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12681                                          False, False)
12682     result.Raise("Grow request failed to node %s" % node)
12683
12684     disk.RecordGrow(self.delta)
12685     self.cfg.Update(instance, feedback_fn)
12686
12687     # Changes have been recorded, release node lock
12688     _ReleaseLocks(self, locking.LEVEL_NODE)
12689
12690     # Downgrade lock while waiting for sync
12691     self.glm.downgrade(locking.LEVEL_INSTANCE)
12692
12693     assert wipe_disks ^ (old_disk_size is None)
12694
12695     if wipe_disks:
12696       assert instance.disks[self.op.disk] == disk
12697
12698       # Wipe newly added disk space
12699       _WipeDisks(self, instance,
12700                  disks=[(self.op.disk, disk, old_disk_size)])
12701
12702     if self.op.wait_for_sync:
12703       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12704       if disk_abort:
12705         self.LogWarning("Disk syncing has not returned a good status; check"
12706                         " the instance")
12707       if instance.admin_state != constants.ADMINST_UP:
12708         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12709     elif instance.admin_state != constants.ADMINST_UP:
12710       self.LogWarning("Not shutting down the disk even if the instance is"
12711                       " not supposed to be running because no wait for"
12712                       " sync mode was requested")
12713
12714     assert self.owned_locks(locking.LEVEL_NODE_RES)
12715     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12716
12717
12718 class LUInstanceQueryData(NoHooksLU):
12719   """Query runtime instance data.
12720
12721   """
12722   REQ_BGL = False
12723
12724   def ExpandNames(self):
12725     self.needed_locks = {}
12726
12727     # Use locking if requested or when non-static information is wanted
12728     if not (self.op.static or self.op.use_locking):
12729       self.LogWarning("Non-static data requested, locks need to be acquired")
12730       self.op.use_locking = True
12731
12732     if self.op.instances or not self.op.use_locking:
12733       # Expand instance names right here
12734       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12735     else:
12736       # Will use acquired locks
12737       self.wanted_names = None
12738
12739     if self.op.use_locking:
12740       self.share_locks = _ShareAll()
12741
12742       if self.wanted_names is None:
12743         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12744       else:
12745         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12746
12747       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12748       self.needed_locks[locking.LEVEL_NODE] = []
12749       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12750
12751   def DeclareLocks(self, level):
12752     if self.op.use_locking:
12753       if level == locking.LEVEL_NODEGROUP:
12754         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12755
12756         # Lock all groups used by instances optimistically; this requires going
12757         # via the node before it's locked, requiring verification later on
12758         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12759           frozenset(group_uuid
12760                     for instance_name in owned_instances
12761                     for group_uuid in
12762                       self.cfg.GetInstanceNodeGroups(instance_name))
12763
12764       elif level == locking.LEVEL_NODE:
12765         self._LockInstancesNodes()
12766
12767   def CheckPrereq(self):
12768     """Check prerequisites.
12769
12770     This only checks the optional instance list against the existing names.
12771
12772     """
12773     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12774     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12775     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12776
12777     if self.wanted_names is None:
12778       assert self.op.use_locking, "Locking was not used"
12779       self.wanted_names = owned_instances
12780
12781     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12782
12783     if self.op.use_locking:
12784       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12785                                 None)
12786     else:
12787       assert not (owned_instances or owned_groups or owned_nodes)
12788
12789     self.wanted_instances = instances.values()
12790
12791   def _ComputeBlockdevStatus(self, node, instance, dev):
12792     """Returns the status of a block device
12793
12794     """
12795     if self.op.static or not node:
12796       return None
12797
12798     self.cfg.SetDiskID(dev, node)
12799
12800     result = self.rpc.call_blockdev_find(node, dev)
12801     if result.offline:
12802       return None
12803
12804     result.Raise("Can't compute disk status for %s" % instance.name)
12805
12806     status = result.payload
12807     if status is None:
12808       return None
12809
12810     return (status.dev_path, status.major, status.minor,
12811             status.sync_percent, status.estimated_time,
12812             status.is_degraded, status.ldisk_status)
12813
12814   def _ComputeDiskStatus(self, instance, snode, dev):
12815     """Compute block device status.
12816
12817     """
12818     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12819
12820     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12821
12822   def _ComputeDiskStatusInner(self, instance, snode, dev):
12823     """Compute block device status.
12824
12825     @attention: The device has to be annotated already.
12826
12827     """
12828     if dev.dev_type in constants.LDS_DRBD:
12829       # we change the snode then (otherwise we use the one passed in)
12830       if dev.logical_id[0] == instance.primary_node:
12831         snode = dev.logical_id[1]
12832       else:
12833         snode = dev.logical_id[0]
12834
12835     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12836                                               instance, dev)
12837     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12838
12839     if dev.children:
12840       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12841                                         instance, snode),
12842                          dev.children)
12843     else:
12844       dev_children = []
12845
12846     return {
12847       "iv_name": dev.iv_name,
12848       "dev_type": dev.dev_type,
12849       "logical_id": dev.logical_id,
12850       "physical_id": dev.physical_id,
12851       "pstatus": dev_pstatus,
12852       "sstatus": dev_sstatus,
12853       "children": dev_children,
12854       "mode": dev.mode,
12855       "size": dev.size,
12856       }
12857
12858   def Exec(self, feedback_fn):
12859     """Gather and return data"""
12860     result = {}
12861
12862     cluster = self.cfg.GetClusterInfo()
12863
12864     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12865     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12866
12867     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12868                                                  for node in nodes.values()))
12869
12870     group2name_fn = lambda uuid: groups[uuid].name
12871
12872     for instance in self.wanted_instances:
12873       pnode = nodes[instance.primary_node]
12874
12875       if self.op.static or pnode.offline:
12876         remote_state = None
12877         if pnode.offline:
12878           self.LogWarning("Primary node %s is marked offline, returning static"
12879                           " information only for instance %s" %
12880                           (pnode.name, instance.name))
12881       else:
12882         remote_info = self.rpc.call_instance_info(instance.primary_node,
12883                                                   instance.name,
12884                                                   instance.hypervisor)
12885         remote_info.Raise("Error checking node %s" % instance.primary_node)
12886         remote_info = remote_info.payload
12887         if remote_info and "state" in remote_info:
12888           remote_state = "up"
12889         else:
12890           if instance.admin_state == constants.ADMINST_UP:
12891             remote_state = "down"
12892           else:
12893             remote_state = instance.admin_state
12894
12895       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12896                   instance.disks)
12897
12898       snodes_group_uuids = [nodes[snode_name].group
12899                             for snode_name in instance.secondary_nodes]
12900
12901       result[instance.name] = {
12902         "name": instance.name,
12903         "config_state": instance.admin_state,
12904         "run_state": remote_state,
12905         "pnode": instance.primary_node,
12906         "pnode_group_uuid": pnode.group,
12907         "pnode_group_name": group2name_fn(pnode.group),
12908         "snodes": instance.secondary_nodes,
12909         "snodes_group_uuids": snodes_group_uuids,
12910         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12911         "os": instance.os,
12912         # this happens to be the same format used for hooks
12913         "nics": _NICListToTuple(self, instance.nics),
12914         "disk_template": instance.disk_template,
12915         "disks": disks,
12916         "hypervisor": instance.hypervisor,
12917         "network_port": instance.network_port,
12918         "hv_instance": instance.hvparams,
12919         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12920         "be_instance": instance.beparams,
12921         "be_actual": cluster.FillBE(instance),
12922         "os_instance": instance.osparams,
12923         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12924         "serial_no": instance.serial_no,
12925         "mtime": instance.mtime,
12926         "ctime": instance.ctime,
12927         "uuid": instance.uuid,
12928         }
12929
12930     return result
12931
12932
12933 def PrepareContainerMods(mods, private_fn):
12934   """Prepares a list of container modifications by adding a private data field.
12935
12936   @type mods: list of tuples; (operation, index, parameters)
12937   @param mods: List of modifications
12938   @type private_fn: callable or None
12939   @param private_fn: Callable for constructing a private data field for a
12940     modification
12941   @rtype: list
12942
12943   """
12944   if private_fn is None:
12945     fn = lambda: None
12946   else:
12947     fn = private_fn
12948
12949   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12950
12951
12952 #: Type description for changes as returned by L{ApplyContainerMods}'s
12953 #: callbacks
12954 _TApplyContModsCbChanges = \
12955   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12956     ht.TNonEmptyString,
12957     ht.TAny,
12958     ])))
12959
12960
12961 def ApplyContainerMods(kind, container, chgdesc, mods,
12962                        create_fn, modify_fn, remove_fn):
12963   """Applies descriptions in C{mods} to C{container}.
12964
12965   @type kind: string
12966   @param kind: One-word item description
12967   @type container: list
12968   @param container: Container to modify
12969   @type chgdesc: None or list
12970   @param chgdesc: List of applied changes
12971   @type mods: list
12972   @param mods: Modifications as returned by L{PrepareContainerMods}
12973   @type create_fn: callable
12974   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12975     receives absolute item index, parameters and private data object as added
12976     by L{PrepareContainerMods}, returns tuple containing new item and changes
12977     as list
12978   @type modify_fn: callable
12979   @param modify_fn: Callback for modifying an existing item
12980     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12981     and private data object as added by L{PrepareContainerMods}, returns
12982     changes as list
12983   @type remove_fn: callable
12984   @param remove_fn: Callback on removing item; receives absolute item index,
12985     item and private data object as added by L{PrepareContainerMods}
12986
12987   """
12988   for (op, idx, params, private) in mods:
12989     if idx == -1:
12990       # Append
12991       absidx = len(container) - 1
12992     elif idx < 0:
12993       raise IndexError("Not accepting negative indices other than -1")
12994     elif idx > len(container):
12995       raise IndexError("Got %s index %s, but there are only %s" %
12996                        (kind, idx, len(container)))
12997     else:
12998       absidx = idx
12999
13000     changes = None
13001
13002     if op == constants.DDM_ADD:
13003       # Calculate where item will be added
13004       if idx == -1:
13005         addidx = len(container)
13006       else:
13007         addidx = idx
13008
13009       if create_fn is None:
13010         item = params
13011       else:
13012         (item, changes) = create_fn(addidx, params, private)
13013
13014       if idx == -1:
13015         container.append(item)
13016       else:
13017         assert idx >= 0
13018         assert idx <= len(container)
13019         # list.insert does so before the specified index
13020         container.insert(idx, item)
13021     else:
13022       # Retrieve existing item
13023       try:
13024         item = container[absidx]
13025       except IndexError:
13026         raise IndexError("Invalid %s index %s" % (kind, idx))
13027
13028       if op == constants.DDM_REMOVE:
13029         assert not params
13030
13031         if remove_fn is not None:
13032           remove_fn(absidx, item, private)
13033
13034         changes = [("%s/%s" % (kind, absidx), "remove")]
13035
13036         assert container[absidx] == item
13037         del container[absidx]
13038       elif op == constants.DDM_MODIFY:
13039         if modify_fn is not None:
13040           changes = modify_fn(absidx, item, params, private)
13041       else:
13042         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13043
13044     assert _TApplyContModsCbChanges(changes)
13045
13046     if not (chgdesc is None or changes is None):
13047       chgdesc.extend(changes)
13048
13049
13050 def _UpdateIvNames(base_index, disks):
13051   """Updates the C{iv_name} attribute of disks.
13052
13053   @type disks: list of L{objects.Disk}
13054
13055   """
13056   for (idx, disk) in enumerate(disks):
13057     disk.iv_name = "disk/%s" % (base_index + idx, )
13058
13059
13060 class _InstNicModPrivate:
13061   """Data structure for network interface modifications.
13062
13063   Used by L{LUInstanceSetParams}.
13064
13065   """
13066   def __init__(self):
13067     self.params = None
13068     self.filled = None
13069
13070
13071 class LUInstanceSetParams(LogicalUnit):
13072   """Modifies an instances's parameters.
13073
13074   """
13075   HPATH = "instance-modify"
13076   HTYPE = constants.HTYPE_INSTANCE
13077   REQ_BGL = False
13078
13079   @staticmethod
13080   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13081     assert ht.TList(mods)
13082     assert not mods or len(mods[0]) in (2, 3)
13083
13084     if mods and len(mods[0]) == 2:
13085       result = []
13086
13087       addremove = 0
13088       for op, params in mods:
13089         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13090           result.append((op, -1, params))
13091           addremove += 1
13092
13093           if addremove > 1:
13094             raise errors.OpPrereqError("Only one %s add or remove operation is"
13095                                        " supported at a time" % kind,
13096                                        errors.ECODE_INVAL)
13097         else:
13098           result.append((constants.DDM_MODIFY, op, params))
13099
13100       assert verify_fn(result)
13101     else:
13102       result = mods
13103
13104     return result
13105
13106   @staticmethod
13107   def _CheckMods(kind, mods, key_types, item_fn):
13108     """Ensures requested disk/NIC modifications are valid.
13109
13110     """
13111     for (op, _, params) in mods:
13112       assert ht.TDict(params)
13113
13114       # If 'key_types' is an empty dict, we assume we have an
13115       # 'ext' template and thus do not ForceDictType
13116       if key_types:
13117         utils.ForceDictType(params, key_types)
13118
13119       if op == constants.DDM_REMOVE:
13120         if params:
13121           raise errors.OpPrereqError("No settings should be passed when"
13122                                      " removing a %s" % kind,
13123                                      errors.ECODE_INVAL)
13124       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13125         item_fn(op, params)
13126       else:
13127         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13128
13129   @staticmethod
13130   def _VerifyDiskModification(op, params):
13131     """Verifies a disk modification.
13132
13133     """
13134     if op == constants.DDM_ADD:
13135       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13136       if mode not in constants.DISK_ACCESS_SET:
13137         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13138                                    errors.ECODE_INVAL)
13139
13140       size = params.get(constants.IDISK_SIZE, None)
13141       if size is None:
13142         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13143                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13144
13145       try:
13146         size = int(size)
13147       except (TypeError, ValueError), err:
13148         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13149                                    errors.ECODE_INVAL)
13150
13151       params[constants.IDISK_SIZE] = size
13152
13153     elif op == constants.DDM_MODIFY:
13154       if constants.IDISK_SIZE in params:
13155         raise errors.OpPrereqError("Disk size change not possible, use"
13156                                    " grow-disk", errors.ECODE_INVAL)
13157       if constants.IDISK_MODE not in params:
13158         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13159                                    " modification supported, but missing",
13160                                    errors.ECODE_NOENT)
13161       if len(params) > 1:
13162         raise errors.OpPrereqError("Disk modification doesn't support"
13163                                    " additional arbitrary parameters",
13164                                    errors.ECODE_INVAL)
13165
13166   @staticmethod
13167   def _VerifyNicModification(op, params):
13168     """Verifies a network interface modification.
13169
13170     """
13171     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13172       ip = params.get(constants.INIC_IP, None)
13173       req_net = params.get(constants.INIC_NETWORK, None)
13174       link = params.get(constants.NIC_LINK, None)
13175       mode = params.get(constants.NIC_MODE, None)
13176       if req_net is not None:
13177         if req_net.lower() == constants.VALUE_NONE:
13178           params[constants.INIC_NETWORK] = None
13179           req_net = None
13180         elif link is not None or mode is not None:
13181           raise errors.OpPrereqError("If network is given"
13182                                      " mode or link should not",
13183                                      errors.ECODE_INVAL)
13184
13185       if op == constants.DDM_ADD:
13186         macaddr = params.get(constants.INIC_MAC, None)
13187         if macaddr is None:
13188           params[constants.INIC_MAC] = constants.VALUE_AUTO
13189
13190       if ip is not None:
13191         if ip.lower() == constants.VALUE_NONE:
13192           params[constants.INIC_IP] = None
13193         else:
13194           if ip.lower() == constants.NIC_IP_POOL:
13195             if op == constants.DDM_ADD and req_net is None:
13196               raise errors.OpPrereqError("If ip=pool, parameter network"
13197                                          " cannot be none",
13198                                          errors.ECODE_INVAL)
13199           else:
13200             if not netutils.IPAddress.IsValid(ip):
13201               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13202                                          errors.ECODE_INVAL)
13203
13204       if constants.INIC_MAC in params:
13205         macaddr = params[constants.INIC_MAC]
13206         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13207           macaddr = utils.NormalizeAndValidateMac(macaddr)
13208
13209         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13210           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13211                                      " modifying an existing NIC",
13212                                      errors.ECODE_INVAL)
13213
13214   def CheckArguments(self):
13215     if not (self.op.nics or self.op.disks or self.op.disk_template or
13216             self.op.hvparams or self.op.beparams or self.op.os_name or
13217             self.op.offline is not None or self.op.runtime_mem):
13218       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13219
13220     if self.op.hvparams:
13221       _CheckGlobalHvParams(self.op.hvparams)
13222
13223     self.op.disks = self._UpgradeDiskNicMods(
13224       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13225     self.op.nics = self._UpgradeDiskNicMods(
13226       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13227
13228     if self.op.disks and self.op.disk_template is not None:
13229       raise errors.OpPrereqError("Disk template conversion and other disk"
13230                                  " changes not supported at the same time",
13231                                  errors.ECODE_INVAL)
13232
13233     if (self.op.disk_template and
13234         self.op.disk_template in constants.DTS_INT_MIRROR and
13235         self.op.remote_node is None):
13236       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13237                                  " one requires specifying a secondary node",
13238                                  errors.ECODE_INVAL)
13239
13240     # Check NIC modifications
13241     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13242                     self._VerifyNicModification)
13243
13244   def ExpandNames(self):
13245     self._ExpandAndLockInstance()
13246     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13247     # Can't even acquire node locks in shared mode as upcoming changes in
13248     # Ganeti 2.6 will start to modify the node object on disk conversion
13249     self.needed_locks[locking.LEVEL_NODE] = []
13250     self.needed_locks[locking.LEVEL_NODE_RES] = []
13251     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13252     # Look node group to look up the ipolicy
13253     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13254
13255   def DeclareLocks(self, level):
13256     if level == locking.LEVEL_NODEGROUP:
13257       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13258       # Acquire locks for the instance's nodegroups optimistically. Needs
13259       # to be verified in CheckPrereq
13260       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13261         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13262     elif level == locking.LEVEL_NODE:
13263       self._LockInstancesNodes()
13264       if self.op.disk_template and self.op.remote_node:
13265         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13266         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13267     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13268       # Copy node locks
13269       self.needed_locks[locking.LEVEL_NODE_RES] = \
13270         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13271
13272   def BuildHooksEnv(self):
13273     """Build hooks env.
13274
13275     This runs on the master, primary and secondaries.
13276
13277     """
13278     args = {}
13279     if constants.BE_MINMEM in self.be_new:
13280       args["minmem"] = self.be_new[constants.BE_MINMEM]
13281     if constants.BE_MAXMEM in self.be_new:
13282       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13283     if constants.BE_VCPUS in self.be_new:
13284       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13285     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13286     # information at all.
13287
13288     if self._new_nics is not None:
13289       nics = []
13290
13291       for nic in self._new_nics:
13292         n = copy.deepcopy(nic)
13293         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13294         n.nicparams = nicparams
13295         nics.append(_NICToTuple(self, n))
13296
13297       args["nics"] = nics
13298
13299     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13300     if self.op.disk_template:
13301       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13302     if self.op.runtime_mem:
13303       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13304
13305     return env
13306
13307   def BuildHooksNodes(self):
13308     """Build hooks nodes.
13309
13310     """
13311     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13312     return (nl, nl)
13313
13314   def _PrepareNicModification(self, params, private, old_ip, old_net,
13315                               old_params, cluster, pnode):
13316
13317     update_params_dict = dict([(key, params[key])
13318                                for key in constants.NICS_PARAMETERS
13319                                if key in params])
13320
13321     req_link = update_params_dict.get(constants.NIC_LINK, None)
13322     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13323
13324     new_net = params.get(constants.INIC_NETWORK, old_net)
13325     if new_net is not None:
13326       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13327       if netparams is None:
13328         raise errors.OpPrereqError("No netparams found for the network"
13329                                    " %s, probably not connected" % new_net,
13330                                    errors.ECODE_INVAL)
13331       new_params = dict(netparams)
13332     else:
13333       new_params = _GetUpdatedParams(old_params, update_params_dict)
13334
13335     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13336
13337     new_filled_params = cluster.SimpleFillNIC(new_params)
13338     objects.NIC.CheckParameterSyntax(new_filled_params)
13339
13340     new_mode = new_filled_params[constants.NIC_MODE]
13341     if new_mode == constants.NIC_MODE_BRIDGED:
13342       bridge = new_filled_params[constants.NIC_LINK]
13343       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13344       if msg:
13345         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13346         if self.op.force:
13347           self.warn.append(msg)
13348         else:
13349           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13350
13351     elif new_mode == constants.NIC_MODE_ROUTED:
13352       ip = params.get(constants.INIC_IP, old_ip)
13353       if ip is None:
13354         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13355                                    " on a routed NIC", errors.ECODE_INVAL)
13356
13357     elif new_mode == constants.NIC_MODE_OVS:
13358       # TODO: check OVS link
13359       self.LogInfo("OVS links are currently not checked for correctness")
13360
13361     if constants.INIC_MAC in params:
13362       mac = params[constants.INIC_MAC]
13363       if mac is None:
13364         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13365                                    errors.ECODE_INVAL)
13366       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13367         # otherwise generate the MAC address
13368         params[constants.INIC_MAC] = \
13369           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13370       else:
13371         # or validate/reserve the current one
13372         try:
13373           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13374         except errors.ReservationError:
13375           raise errors.OpPrereqError("MAC address '%s' already in use"
13376                                      " in cluster" % mac,
13377                                      errors.ECODE_NOTUNIQUE)
13378     elif new_net != old_net:
13379
13380       def get_net_prefix(net):
13381         if net:
13382           uuid = self.cfg.LookupNetwork(net)
13383           if uuid:
13384             nobj = self.cfg.GetNetwork(uuid)
13385             return nobj.mac_prefix
13386         return None
13387
13388       new_prefix = get_net_prefix(new_net)
13389       old_prefix = get_net_prefix(old_net)
13390       if old_prefix != new_prefix:
13391         params[constants.INIC_MAC] = \
13392           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13393
13394     #if there is a change in nic-network configuration
13395     new_ip = params.get(constants.INIC_IP, old_ip)
13396     if (new_ip, new_net) != (old_ip, old_net):
13397       if new_ip:
13398         if new_net:
13399           if new_ip.lower() == constants.NIC_IP_POOL:
13400             try:
13401               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13402             except errors.ReservationError:
13403               raise errors.OpPrereqError("Unable to get a free IP"
13404                                          " from the address pool",
13405                                          errors.ECODE_STATE)
13406             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13407             params[constants.INIC_IP] = new_ip
13408           elif new_ip != old_ip or new_net != old_net:
13409             try:
13410               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13411               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13412             except errors.ReservationError:
13413               raise errors.OpPrereqError("IP %s not available in network %s" %
13414                                          (new_ip, new_net),
13415                                          errors.ECODE_NOTUNIQUE)
13416         elif new_ip.lower() == constants.NIC_IP_POOL:
13417           raise errors.OpPrereqError("ip=pool, but no network found",
13418                                      errors.ECODE_INVAL)
13419
13420         # new net is None
13421         elif self.op.conflicts_check:
13422           _CheckForConflictingIp(self, new_ip, pnode)
13423
13424       if old_ip:
13425         if old_net:
13426           try:
13427             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13428           except errors.AddressPoolError:
13429             logging.warning("Release IP %s not contained in network %s",
13430                             old_ip, old_net)
13431
13432     # there are no changes in (net, ip) tuple
13433     elif (old_net is not None and
13434           (req_link is not None or req_mode is not None)):
13435       raise errors.OpPrereqError("Not allowed to change link or mode of"
13436                                  " a NIC that is connected to a network",
13437                                  errors.ECODE_INVAL)
13438
13439     private.params = new_params
13440     private.filled = new_filled_params
13441
13442   def _PreCheckDiskTemplate(self, pnode_info):
13443     """CheckPrereq checks related to a new disk template."""
13444     # Arguments are passed to avoid configuration lookups
13445     instance = self.instance
13446     pnode = instance.primary_node
13447     cluster = self.cluster
13448     if instance.disk_template == self.op.disk_template:
13449       raise errors.OpPrereqError("Instance already has disk template %s" %
13450                                  instance.disk_template, errors.ECODE_INVAL)
13451
13452     if (instance.disk_template,
13453         self.op.disk_template) not in self._DISK_CONVERSIONS:
13454       raise errors.OpPrereqError("Unsupported disk template conversion from"
13455                                  " %s to %s" % (instance.disk_template,
13456                                                 self.op.disk_template),
13457                                  errors.ECODE_INVAL)
13458     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13459                         msg="cannot change disk template")
13460     if self.op.disk_template in constants.DTS_INT_MIRROR:
13461       if self.op.remote_node == pnode:
13462         raise errors.OpPrereqError("Given new secondary node %s is the same"
13463                                    " as the primary node of the instance" %
13464                                    self.op.remote_node, errors.ECODE_STATE)
13465       _CheckNodeOnline(self, self.op.remote_node)
13466       _CheckNodeNotDrained(self, self.op.remote_node)
13467       # FIXME: here we assume that the old instance type is DT_PLAIN
13468       assert instance.disk_template == constants.DT_PLAIN
13469       disks = [{constants.IDISK_SIZE: d.size,
13470                 constants.IDISK_VG: d.logical_id[0]}
13471                for d in instance.disks]
13472       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13473       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13474
13475       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13476       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13477       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13478                                                               snode_group)
13479       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13480                               ignore=self.op.ignore_ipolicy)
13481       if pnode_info.group != snode_info.group:
13482         self.LogWarning("The primary and secondary nodes are in two"
13483                         " different node groups; the disk parameters"
13484                         " from the first disk's node group will be"
13485                         " used")
13486
13487     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13488       # Make sure none of the nodes require exclusive storage
13489       nodes = [pnode_info]
13490       if self.op.disk_template in constants.DTS_INT_MIRROR:
13491         assert snode_info
13492         nodes.append(snode_info)
13493       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13494       if compat.any(map(has_es, nodes)):
13495         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13496                   " storage is enabled" % (instance.disk_template,
13497                                            self.op.disk_template))
13498         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13499
13500   def CheckPrereq(self):
13501     """Check prerequisites.
13502
13503     This only checks the instance list against the existing names.
13504
13505     """
13506     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13507     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13508
13509     cluster = self.cluster = self.cfg.GetClusterInfo()
13510     assert self.instance is not None, \
13511       "Cannot retrieve locked instance %s" % self.op.instance_name
13512
13513     pnode = instance.primary_node
13514     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13515     nodelist = list(instance.all_nodes)
13516     pnode_info = self.cfg.GetNodeInfo(pnode)
13517     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13518
13519     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13520     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13521     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13522
13523     # dictionary with instance information after the modification
13524     ispec = {}
13525
13526     # Check disk modifications. This is done here and not in CheckArguments
13527     # (as with NICs), because we need to know the instance's disk template
13528     if instance.disk_template == constants.DT_EXT:
13529       self._CheckMods("disk", self.op.disks, {},
13530                       self._VerifyDiskModification)
13531     else:
13532       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13533                       self._VerifyDiskModification)
13534
13535     # Prepare disk/NIC modifications
13536     self.diskmod = PrepareContainerMods(self.op.disks, None)
13537     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13538
13539     # Check the validity of the `provider' parameter
13540     if instance.disk_template in constants.DT_EXT:
13541       for mod in self.diskmod:
13542         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13543         if mod[0] == constants.DDM_ADD:
13544           if ext_provider is None:
13545             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13546                                        " '%s' missing, during disk add" %
13547                                        (constants.DT_EXT,
13548                                         constants.IDISK_PROVIDER),
13549                                        errors.ECODE_NOENT)
13550         elif mod[0] == constants.DDM_MODIFY:
13551           if ext_provider:
13552             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13553                                        " modification" %
13554                                        constants.IDISK_PROVIDER,
13555                                        errors.ECODE_INVAL)
13556     else:
13557       for mod in self.diskmod:
13558         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13559         if ext_provider is not None:
13560           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13561                                      " instances of type '%s'" %
13562                                      (constants.IDISK_PROVIDER,
13563                                       constants.DT_EXT),
13564                                      errors.ECODE_INVAL)
13565
13566     # OS change
13567     if self.op.os_name and not self.op.force:
13568       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13569                       self.op.force_variant)
13570       instance_os = self.op.os_name
13571     else:
13572       instance_os = instance.os
13573
13574     assert not (self.op.disk_template and self.op.disks), \
13575       "Can't modify disk template and apply disk changes at the same time"
13576
13577     if self.op.disk_template:
13578       self._PreCheckDiskTemplate(pnode_info)
13579
13580     # hvparams processing
13581     if self.op.hvparams:
13582       hv_type = instance.hypervisor
13583       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13584       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13585       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13586
13587       # local check
13588       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13589       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13590       self.hv_proposed = self.hv_new = hv_new # the new actual values
13591       self.hv_inst = i_hvdict # the new dict (without defaults)
13592     else:
13593       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13594                                               instance.hvparams)
13595       self.hv_new = self.hv_inst = {}
13596
13597     # beparams processing
13598     if self.op.beparams:
13599       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13600                                    use_none=True)
13601       objects.UpgradeBeParams(i_bedict)
13602       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13603       be_new = cluster.SimpleFillBE(i_bedict)
13604       self.be_proposed = self.be_new = be_new # the new actual values
13605       self.be_inst = i_bedict # the new dict (without defaults)
13606     else:
13607       self.be_new = self.be_inst = {}
13608       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13609     be_old = cluster.FillBE(instance)
13610
13611     # CPU param validation -- checking every time a parameter is
13612     # changed to cover all cases where either CPU mask or vcpus have
13613     # changed
13614     if (constants.BE_VCPUS in self.be_proposed and
13615         constants.HV_CPU_MASK in self.hv_proposed):
13616       cpu_list = \
13617         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13618       # Verify mask is consistent with number of vCPUs. Can skip this
13619       # test if only 1 entry in the CPU mask, which means same mask
13620       # is applied to all vCPUs.
13621       if (len(cpu_list) > 1 and
13622           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13623         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13624                                    " CPU mask [%s]" %
13625                                    (self.be_proposed[constants.BE_VCPUS],
13626                                     self.hv_proposed[constants.HV_CPU_MASK]),
13627                                    errors.ECODE_INVAL)
13628
13629       # Only perform this test if a new CPU mask is given
13630       if constants.HV_CPU_MASK in self.hv_new:
13631         # Calculate the largest CPU number requested
13632         max_requested_cpu = max(map(max, cpu_list))
13633         # Check that all of the instance's nodes have enough physical CPUs to
13634         # satisfy the requested CPU mask
13635         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13636                                 max_requested_cpu + 1, instance.hypervisor)
13637
13638     # osparams processing
13639     if self.op.osparams:
13640       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13641       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13642       self.os_inst = i_osdict # the new dict (without defaults)
13643     else:
13644       self.os_inst = {}
13645
13646     self.warn = []
13647
13648     #TODO(dynmem): do the appropriate check involving MINMEM
13649     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13650         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13651       mem_check_list = [pnode]
13652       if be_new[constants.BE_AUTO_BALANCE]:
13653         # either we changed auto_balance to yes or it was from before
13654         mem_check_list.extend(instance.secondary_nodes)
13655       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13656                                                   instance.hypervisor)
13657       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13658                                          [instance.hypervisor], False)
13659       pninfo = nodeinfo[pnode]
13660       msg = pninfo.fail_msg
13661       if msg:
13662         # Assume the primary node is unreachable and go ahead
13663         self.warn.append("Can't get info from primary node %s: %s" %
13664                          (pnode, msg))
13665       else:
13666         (_, _, (pnhvinfo, )) = pninfo.payload
13667         if not isinstance(pnhvinfo.get("memory_free", None), int):
13668           self.warn.append("Node data from primary node %s doesn't contain"
13669                            " free memory information" % pnode)
13670         elif instance_info.fail_msg:
13671           self.warn.append("Can't get instance runtime information: %s" %
13672                            instance_info.fail_msg)
13673         else:
13674           if instance_info.payload:
13675             current_mem = int(instance_info.payload["memory"])
13676           else:
13677             # Assume instance not running
13678             # (there is a slight race condition here, but it's not very
13679             # probable, and we have no other way to check)
13680             # TODO: Describe race condition
13681             current_mem = 0
13682           #TODO(dynmem): do the appropriate check involving MINMEM
13683           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13684                       pnhvinfo["memory_free"])
13685           if miss_mem > 0:
13686             raise errors.OpPrereqError("This change will prevent the instance"
13687                                        " from starting, due to %d MB of memory"
13688                                        " missing on its primary node" %
13689                                        miss_mem, errors.ECODE_NORES)
13690
13691       if be_new[constants.BE_AUTO_BALANCE]:
13692         for node, nres in nodeinfo.items():
13693           if node not in instance.secondary_nodes:
13694             continue
13695           nres.Raise("Can't get info from secondary node %s" % node,
13696                      prereq=True, ecode=errors.ECODE_STATE)
13697           (_, _, (nhvinfo, )) = nres.payload
13698           if not isinstance(nhvinfo.get("memory_free", None), int):
13699             raise errors.OpPrereqError("Secondary node %s didn't return free"
13700                                        " memory information" % node,
13701                                        errors.ECODE_STATE)
13702           #TODO(dynmem): do the appropriate check involving MINMEM
13703           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13704             raise errors.OpPrereqError("This change will prevent the instance"
13705                                        " from failover to its secondary node"
13706                                        " %s, due to not enough memory" % node,
13707                                        errors.ECODE_STATE)
13708
13709     if self.op.runtime_mem:
13710       remote_info = self.rpc.call_instance_info(instance.primary_node,
13711                                                 instance.name,
13712                                                 instance.hypervisor)
13713       remote_info.Raise("Error checking node %s" % instance.primary_node)
13714       if not remote_info.payload: # not running already
13715         raise errors.OpPrereqError("Instance %s is not running" %
13716                                    instance.name, errors.ECODE_STATE)
13717
13718       current_memory = remote_info.payload["memory"]
13719       if (not self.op.force and
13720            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13721             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13722         raise errors.OpPrereqError("Instance %s must have memory between %d"
13723                                    " and %d MB of memory unless --force is"
13724                                    " given" %
13725                                    (instance.name,
13726                                     self.be_proposed[constants.BE_MINMEM],
13727                                     self.be_proposed[constants.BE_MAXMEM]),
13728                                    errors.ECODE_INVAL)
13729
13730       delta = self.op.runtime_mem - current_memory
13731       if delta > 0:
13732         _CheckNodeFreeMemory(self, instance.primary_node,
13733                              "ballooning memory for instance %s" %
13734                              instance.name, delta, instance.hypervisor)
13735
13736     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13737       raise errors.OpPrereqError("Disk operations not supported for"
13738                                  " diskless instances", errors.ECODE_INVAL)
13739
13740     def _PrepareNicCreate(_, params, private):
13741       self._PrepareNicModification(params, private, None, None,
13742                                    {}, cluster, pnode)
13743       return (None, None)
13744
13745     def _PrepareNicMod(_, nic, params, private):
13746       self._PrepareNicModification(params, private, nic.ip, nic.network,
13747                                    nic.nicparams, cluster, pnode)
13748       return None
13749
13750     def _PrepareNicRemove(_, params, __):
13751       ip = params.ip
13752       net = params.network
13753       if net is not None and ip is not None:
13754         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13755
13756     # Verify NIC changes (operating on copy)
13757     nics = instance.nics[:]
13758     ApplyContainerMods("NIC", nics, None, self.nicmod,
13759                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13760     if len(nics) > constants.MAX_NICS:
13761       raise errors.OpPrereqError("Instance has too many network interfaces"
13762                                  " (%d), cannot add more" % constants.MAX_NICS,
13763                                  errors.ECODE_STATE)
13764
13765     # Verify disk changes (operating on a copy)
13766     disks = instance.disks[:]
13767     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13768     if len(disks) > constants.MAX_DISKS:
13769       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13770                                  " more" % constants.MAX_DISKS,
13771                                  errors.ECODE_STATE)
13772     disk_sizes = [disk.size for disk in instance.disks]
13773     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13774                       self.diskmod if op == constants.DDM_ADD)
13775     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13776     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13777
13778     if self.op.offline is not None and self.op.offline:
13779       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13780                           msg="can't change to offline")
13781
13782     # Pre-compute NIC changes (necessary to use result in hooks)
13783     self._nic_chgdesc = []
13784     if self.nicmod:
13785       # Operate on copies as this is still in prereq
13786       nics = [nic.Copy() for nic in instance.nics]
13787       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13788                          self._CreateNewNic, self._ApplyNicMods, None)
13789       self._new_nics = nics
13790       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13791     else:
13792       self._new_nics = None
13793       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13794
13795     if not self.op.ignore_ipolicy:
13796       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13797                                                               group_info)
13798
13799       # Fill ispec with backend parameters
13800       ispec[constants.ISPEC_SPINDLE_USE] = \
13801         self.be_new.get(constants.BE_SPINDLE_USE, None)
13802       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13803                                                          None)
13804
13805       # Copy ispec to verify parameters with min/max values separately
13806       ispec_max = ispec.copy()
13807       ispec_max[constants.ISPEC_MEM_SIZE] = \
13808         self.be_new.get(constants.BE_MAXMEM, None)
13809       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13810       ispec_min = ispec.copy()
13811       ispec_min[constants.ISPEC_MEM_SIZE] = \
13812         self.be_new.get(constants.BE_MINMEM, None)
13813       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13814
13815       if (res_max or res_min):
13816         # FIXME: Improve error message by including information about whether
13817         # the upper or lower limit of the parameter fails the ipolicy.
13818         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13819                (group_info, group_info.name,
13820                 utils.CommaJoin(set(res_max + res_min))))
13821         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13822
13823   def _ConvertPlainToDrbd(self, feedback_fn):
13824     """Converts an instance from plain to drbd.
13825
13826     """
13827     feedback_fn("Converting template to drbd")
13828     instance = self.instance
13829     pnode = instance.primary_node
13830     snode = self.op.remote_node
13831
13832     assert instance.disk_template == constants.DT_PLAIN
13833
13834     # create a fake disk info for _GenerateDiskTemplate
13835     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13836                   constants.IDISK_VG: d.logical_id[0]}
13837                  for d in instance.disks]
13838     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13839                                       instance.name, pnode, [snode],
13840                                       disk_info, None, None, 0, feedback_fn,
13841                                       self.diskparams)
13842     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13843                                         self.diskparams)
13844     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13845     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13846     info = _GetInstanceInfoText(instance)
13847     feedback_fn("Creating additional volumes...")
13848     # first, create the missing data and meta devices
13849     for disk in anno_disks:
13850       # unfortunately this is... not too nice
13851       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13852                             info, True, p_excl_stor)
13853       for child in disk.children:
13854         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13855                               s_excl_stor)
13856     # at this stage, all new LVs have been created, we can rename the
13857     # old ones
13858     feedback_fn("Renaming original volumes...")
13859     rename_list = [(o, n.children[0].logical_id)
13860                    for (o, n) in zip(instance.disks, new_disks)]
13861     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13862     result.Raise("Failed to rename original LVs")
13863
13864     feedback_fn("Initializing DRBD devices...")
13865     # all child devices are in place, we can now create the DRBD devices
13866     for disk in anno_disks:
13867       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13868         f_create = node == pnode
13869         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13870                               excl_stor)
13871
13872     # at this point, the instance has been modified
13873     instance.disk_template = constants.DT_DRBD8
13874     instance.disks = new_disks
13875     self.cfg.Update(instance, feedback_fn)
13876
13877     # Release node locks while waiting for sync
13878     _ReleaseLocks(self, locking.LEVEL_NODE)
13879
13880     # disks are created, waiting for sync
13881     disk_abort = not _WaitForSync(self, instance,
13882                                   oneshot=not self.op.wait_for_sync)
13883     if disk_abort:
13884       raise errors.OpExecError("There are some degraded disks for"
13885                                " this instance, please cleanup manually")
13886
13887     # Node resource locks will be released by caller
13888
13889   def _ConvertDrbdToPlain(self, feedback_fn):
13890     """Converts an instance from drbd to plain.
13891
13892     """
13893     instance = self.instance
13894
13895     assert len(instance.secondary_nodes) == 1
13896     assert instance.disk_template == constants.DT_DRBD8
13897
13898     pnode = instance.primary_node
13899     snode = instance.secondary_nodes[0]
13900     feedback_fn("Converting template to plain")
13901
13902     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13903     new_disks = [d.children[0] for d in instance.disks]
13904
13905     # copy over size and mode
13906     for parent, child in zip(old_disks, new_disks):
13907       child.size = parent.size
13908       child.mode = parent.mode
13909
13910     # this is a DRBD disk, return its port to the pool
13911     # NOTE: this must be done right before the call to cfg.Update!
13912     for disk in old_disks:
13913       tcp_port = disk.logical_id[2]
13914       self.cfg.AddTcpUdpPort(tcp_port)
13915
13916     # update instance structure
13917     instance.disks = new_disks
13918     instance.disk_template = constants.DT_PLAIN
13919     self.cfg.Update(instance, feedback_fn)
13920
13921     # Release locks in case removing disks takes a while
13922     _ReleaseLocks(self, locking.LEVEL_NODE)
13923
13924     feedback_fn("Removing volumes on the secondary node...")
13925     for disk in old_disks:
13926       self.cfg.SetDiskID(disk, snode)
13927       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13928       if msg:
13929         self.LogWarning("Could not remove block device %s on node %s,"
13930                         " continuing anyway: %s", disk.iv_name, snode, msg)
13931
13932     feedback_fn("Removing unneeded volumes on the primary node...")
13933     for idx, disk in enumerate(old_disks):
13934       meta = disk.children[1]
13935       self.cfg.SetDiskID(meta, pnode)
13936       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13937       if msg:
13938         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13939                         " continuing anyway: %s", idx, pnode, msg)
13940
13941   def _CreateNewDisk(self, idx, params, _):
13942     """Creates a new disk.
13943
13944     """
13945     instance = self.instance
13946
13947     # add a new disk
13948     if instance.disk_template in constants.DTS_FILEBASED:
13949       (file_driver, file_path) = instance.disks[0].logical_id
13950       file_path = os.path.dirname(file_path)
13951     else:
13952       file_driver = file_path = None
13953
13954     disk = \
13955       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13956                             instance.primary_node, instance.secondary_nodes,
13957                             [params], file_path, file_driver, idx,
13958                             self.Log, self.diskparams)[0]
13959
13960     info = _GetInstanceInfoText(instance)
13961
13962     logging.info("Creating volume %s for instance %s",
13963                  disk.iv_name, instance.name)
13964     # Note: this needs to be kept in sync with _CreateDisks
13965     #HARDCODE
13966     for node in instance.all_nodes:
13967       f_create = (node == instance.primary_node)
13968       try:
13969         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13970       except errors.OpExecError, err:
13971         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13972                         disk.iv_name, disk, node, err)
13973
13974     return (disk, [
13975       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13976       ])
13977
13978   @staticmethod
13979   def _ModifyDisk(idx, disk, params, _):
13980     """Modifies a disk.
13981
13982     """
13983     disk.mode = params[constants.IDISK_MODE]
13984
13985     return [
13986       ("disk.mode/%d" % idx, disk.mode),
13987       ]
13988
13989   def _RemoveDisk(self, idx, root, _):
13990     """Removes a disk.
13991
13992     """
13993     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13994     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13995       self.cfg.SetDiskID(disk, node)
13996       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13997       if msg:
13998         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13999                         " continuing anyway", idx, node, msg)
14000
14001     # if this is a DRBD disk, return its port to the pool
14002     if root.dev_type in constants.LDS_DRBD:
14003       self.cfg.AddTcpUdpPort(root.logical_id[2])
14004
14005   @staticmethod
14006   def _CreateNewNic(idx, params, private):
14007     """Creates data structure for a new network interface.
14008
14009     """
14010     mac = params[constants.INIC_MAC]
14011     ip = params.get(constants.INIC_IP, None)
14012     net = params.get(constants.INIC_NETWORK, None)
14013     #TODO: not private.filled?? can a nic have no nicparams??
14014     nicparams = private.filled
14015
14016     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14017       ("nic.%d" % idx,
14018        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14019        (mac, ip, private.filled[constants.NIC_MODE],
14020        private.filled[constants.NIC_LINK],
14021        net)),
14022       ])
14023
14024   @staticmethod
14025   def _ApplyNicMods(idx, nic, params, private):
14026     """Modifies a network interface.
14027
14028     """
14029     changes = []
14030
14031     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14032       if key in params:
14033         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14034         setattr(nic, key, params[key])
14035
14036     if private.filled:
14037       nic.nicparams = private.filled
14038
14039       for (key, val) in nic.nicparams.items():
14040         changes.append(("nic.%s/%d" % (key, idx), val))
14041
14042     return changes
14043
14044   def Exec(self, feedback_fn):
14045     """Modifies an instance.
14046
14047     All parameters take effect only at the next restart of the instance.
14048
14049     """
14050     # Process here the warnings from CheckPrereq, as we don't have a
14051     # feedback_fn there.
14052     # TODO: Replace with self.LogWarning
14053     for warn in self.warn:
14054       feedback_fn("WARNING: %s" % warn)
14055
14056     assert ((self.op.disk_template is None) ^
14057             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14058       "Not owning any node resource locks"
14059
14060     result = []
14061     instance = self.instance
14062
14063     # runtime memory
14064     if self.op.runtime_mem:
14065       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14066                                                      instance,
14067                                                      self.op.runtime_mem)
14068       rpcres.Raise("Cannot modify instance runtime memory")
14069       result.append(("runtime_memory", self.op.runtime_mem))
14070
14071     # Apply disk changes
14072     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14073                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14074     _UpdateIvNames(0, instance.disks)
14075
14076     if self.op.disk_template:
14077       if __debug__:
14078         check_nodes = set(instance.all_nodes)
14079         if self.op.remote_node:
14080           check_nodes.add(self.op.remote_node)
14081         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14082           owned = self.owned_locks(level)
14083           assert not (check_nodes - owned), \
14084             ("Not owning the correct locks, owning %r, expected at least %r" %
14085              (owned, check_nodes))
14086
14087       r_shut = _ShutdownInstanceDisks(self, instance)
14088       if not r_shut:
14089         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14090                                  " proceed with disk template conversion")
14091       mode = (instance.disk_template, self.op.disk_template)
14092       try:
14093         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14094       except:
14095         self.cfg.ReleaseDRBDMinors(instance.name)
14096         raise
14097       result.append(("disk_template", self.op.disk_template))
14098
14099       assert instance.disk_template == self.op.disk_template, \
14100         ("Expected disk template '%s', found '%s'" %
14101          (self.op.disk_template, instance.disk_template))
14102
14103     # Release node and resource locks if there are any (they might already have
14104     # been released during disk conversion)
14105     _ReleaseLocks(self, locking.LEVEL_NODE)
14106     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14107
14108     # Apply NIC changes
14109     if self._new_nics is not None:
14110       instance.nics = self._new_nics
14111       result.extend(self._nic_chgdesc)
14112
14113     # hvparams changes
14114     if self.op.hvparams:
14115       instance.hvparams = self.hv_inst
14116       for key, val in self.op.hvparams.iteritems():
14117         result.append(("hv/%s" % key, val))
14118
14119     # beparams changes
14120     if self.op.beparams:
14121       instance.beparams = self.be_inst
14122       for key, val in self.op.beparams.iteritems():
14123         result.append(("be/%s" % key, val))
14124
14125     # OS change
14126     if self.op.os_name:
14127       instance.os = self.op.os_name
14128
14129     # osparams changes
14130     if self.op.osparams:
14131       instance.osparams = self.os_inst
14132       for key, val in self.op.osparams.iteritems():
14133         result.append(("os/%s" % key, val))
14134
14135     if self.op.offline is None:
14136       # Ignore
14137       pass
14138     elif self.op.offline:
14139       # Mark instance as offline
14140       self.cfg.MarkInstanceOffline(instance.name)
14141       result.append(("admin_state", constants.ADMINST_OFFLINE))
14142     else:
14143       # Mark instance as online, but stopped
14144       self.cfg.MarkInstanceDown(instance.name)
14145       result.append(("admin_state", constants.ADMINST_DOWN))
14146
14147     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14148
14149     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14150                 self.owned_locks(locking.LEVEL_NODE)), \
14151       "All node locks should have been released by now"
14152
14153     return result
14154
14155   _DISK_CONVERSIONS = {
14156     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14157     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14158     }
14159
14160
14161 class LUInstanceChangeGroup(LogicalUnit):
14162   HPATH = "instance-change-group"
14163   HTYPE = constants.HTYPE_INSTANCE
14164   REQ_BGL = False
14165
14166   def ExpandNames(self):
14167     self.share_locks = _ShareAll()
14168
14169     self.needed_locks = {
14170       locking.LEVEL_NODEGROUP: [],
14171       locking.LEVEL_NODE: [],
14172       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14173       }
14174
14175     self._ExpandAndLockInstance()
14176
14177     if self.op.target_groups:
14178       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14179                                   self.op.target_groups)
14180     else:
14181       self.req_target_uuids = None
14182
14183     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14184
14185   def DeclareLocks(self, level):
14186     if level == locking.LEVEL_NODEGROUP:
14187       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14188
14189       if self.req_target_uuids:
14190         lock_groups = set(self.req_target_uuids)
14191
14192         # Lock all groups used by instance optimistically; this requires going
14193         # via the node before it's locked, requiring verification later on
14194         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14195         lock_groups.update(instance_groups)
14196       else:
14197         # No target groups, need to lock all of them
14198         lock_groups = locking.ALL_SET
14199
14200       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14201
14202     elif level == locking.LEVEL_NODE:
14203       if self.req_target_uuids:
14204         # Lock all nodes used by instances
14205         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14206         self._LockInstancesNodes()
14207
14208         # Lock all nodes in all potential target groups
14209         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14210                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14211         member_nodes = [node_name
14212                         for group in lock_groups
14213                         for node_name in self.cfg.GetNodeGroup(group).members]
14214         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14215       else:
14216         # Lock all nodes as all groups are potential targets
14217         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14218
14219   def CheckPrereq(self):
14220     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14221     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14222     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14223
14224     assert (self.req_target_uuids is None or
14225             owned_groups.issuperset(self.req_target_uuids))
14226     assert owned_instances == set([self.op.instance_name])
14227
14228     # Get instance information
14229     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14230
14231     # Check if node groups for locked instance are still correct
14232     assert owned_nodes.issuperset(self.instance.all_nodes), \
14233       ("Instance %s's nodes changed while we kept the lock" %
14234        self.op.instance_name)
14235
14236     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14237                                            owned_groups)
14238
14239     if self.req_target_uuids:
14240       # User requested specific target groups
14241       self.target_uuids = frozenset(self.req_target_uuids)
14242     else:
14243       # All groups except those used by the instance are potential targets
14244       self.target_uuids = owned_groups - inst_groups
14245
14246     conflicting_groups = self.target_uuids & inst_groups
14247     if conflicting_groups:
14248       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14249                                  " used by the instance '%s'" %
14250                                  (utils.CommaJoin(conflicting_groups),
14251                                   self.op.instance_name),
14252                                  errors.ECODE_INVAL)
14253
14254     if not self.target_uuids:
14255       raise errors.OpPrereqError("There are no possible target groups",
14256                                  errors.ECODE_INVAL)
14257
14258   def BuildHooksEnv(self):
14259     """Build hooks env.
14260
14261     """
14262     assert self.target_uuids
14263
14264     env = {
14265       "TARGET_GROUPS": " ".join(self.target_uuids),
14266       }
14267
14268     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14269
14270     return env
14271
14272   def BuildHooksNodes(self):
14273     """Build hooks nodes.
14274
14275     """
14276     mn = self.cfg.GetMasterNode()
14277     return ([mn], [mn])
14278
14279   def Exec(self, feedback_fn):
14280     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14281
14282     assert instances == [self.op.instance_name], "Instance not locked"
14283
14284     req = iallocator.IAReqGroupChange(instances=instances,
14285                                       target_groups=list(self.target_uuids))
14286     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14287
14288     ial.Run(self.op.iallocator)
14289
14290     if not ial.success:
14291       raise errors.OpPrereqError("Can't compute solution for changing group of"
14292                                  " instance '%s' using iallocator '%s': %s" %
14293                                  (self.op.instance_name, self.op.iallocator,
14294                                   ial.info), errors.ECODE_NORES)
14295
14296     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14297
14298     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14299                  " instance '%s'", len(jobs), self.op.instance_name)
14300
14301     return ResultWithJobs(jobs)
14302
14303
14304 class LUBackupQuery(NoHooksLU):
14305   """Query the exports list
14306
14307   """
14308   REQ_BGL = False
14309
14310   def CheckArguments(self):
14311     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14312                              ["node", "export"], self.op.use_locking)
14313
14314   def ExpandNames(self):
14315     self.expq.ExpandNames(self)
14316
14317   def DeclareLocks(self, level):
14318     self.expq.DeclareLocks(self, level)
14319
14320   def Exec(self, feedback_fn):
14321     result = {}
14322
14323     for (node, expname) in self.expq.OldStyleQuery(self):
14324       if expname is None:
14325         result[node] = False
14326       else:
14327         result.setdefault(node, []).append(expname)
14328
14329     return result
14330
14331
14332 class _ExportQuery(_QueryBase):
14333   FIELDS = query.EXPORT_FIELDS
14334
14335   #: The node name is not a unique key for this query
14336   SORT_FIELD = "node"
14337
14338   def ExpandNames(self, lu):
14339     lu.needed_locks = {}
14340
14341     # The following variables interact with _QueryBase._GetNames
14342     if self.names:
14343       self.wanted = _GetWantedNodes(lu, self.names)
14344     else:
14345       self.wanted = locking.ALL_SET
14346
14347     self.do_locking = self.use_locking
14348
14349     if self.do_locking:
14350       lu.share_locks = _ShareAll()
14351       lu.needed_locks = {
14352         locking.LEVEL_NODE: self.wanted,
14353         }
14354
14355       if not self.names:
14356         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14357
14358   def DeclareLocks(self, lu, level):
14359     pass
14360
14361   def _GetQueryData(self, lu):
14362     """Computes the list of nodes and their attributes.
14363
14364     """
14365     # Locking is not used
14366     # TODO
14367     assert not (compat.any(lu.glm.is_owned(level)
14368                            for level in locking.LEVELS
14369                            if level != locking.LEVEL_CLUSTER) or
14370                 self.do_locking or self.use_locking)
14371
14372     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14373
14374     result = []
14375
14376     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14377       if nres.fail_msg:
14378         result.append((node, None))
14379       else:
14380         result.extend((node, expname) for expname in nres.payload)
14381
14382     return result
14383
14384
14385 class LUBackupPrepare(NoHooksLU):
14386   """Prepares an instance for an export and returns useful information.
14387
14388   """
14389   REQ_BGL = False
14390
14391   def ExpandNames(self):
14392     self._ExpandAndLockInstance()
14393
14394   def CheckPrereq(self):
14395     """Check prerequisites.
14396
14397     """
14398     instance_name = self.op.instance_name
14399
14400     self.instance = self.cfg.GetInstanceInfo(instance_name)
14401     assert self.instance is not None, \
14402           "Cannot retrieve locked instance %s" % self.op.instance_name
14403     _CheckNodeOnline(self, self.instance.primary_node)
14404
14405     self._cds = _GetClusterDomainSecret()
14406
14407   def Exec(self, feedback_fn):
14408     """Prepares an instance for an export.
14409
14410     """
14411     instance = self.instance
14412
14413     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14414       salt = utils.GenerateSecret(8)
14415
14416       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14417       result = self.rpc.call_x509_cert_create(instance.primary_node,
14418                                               constants.RIE_CERT_VALIDITY)
14419       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14420
14421       (name, cert_pem) = result.payload
14422
14423       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14424                                              cert_pem)
14425
14426       return {
14427         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14428         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14429                           salt),
14430         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14431         }
14432
14433     return None
14434
14435
14436 class LUBackupExport(LogicalUnit):
14437   """Export an instance to an image in the cluster.
14438
14439   """
14440   HPATH = "instance-export"
14441   HTYPE = constants.HTYPE_INSTANCE
14442   REQ_BGL = False
14443
14444   def CheckArguments(self):
14445     """Check the arguments.
14446
14447     """
14448     self.x509_key_name = self.op.x509_key_name
14449     self.dest_x509_ca_pem = self.op.destination_x509_ca
14450
14451     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14452       if not self.x509_key_name:
14453         raise errors.OpPrereqError("Missing X509 key name for encryption",
14454                                    errors.ECODE_INVAL)
14455
14456       if not self.dest_x509_ca_pem:
14457         raise errors.OpPrereqError("Missing destination X509 CA",
14458                                    errors.ECODE_INVAL)
14459
14460   def ExpandNames(self):
14461     self._ExpandAndLockInstance()
14462
14463     # Lock all nodes for local exports
14464     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14465       # FIXME: lock only instance primary and destination node
14466       #
14467       # Sad but true, for now we have do lock all nodes, as we don't know where
14468       # the previous export might be, and in this LU we search for it and
14469       # remove it from its current node. In the future we could fix this by:
14470       #  - making a tasklet to search (share-lock all), then create the
14471       #    new one, then one to remove, after
14472       #  - removing the removal operation altogether
14473       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14474
14475       # Allocations should be stopped while this LU runs with node locks, but
14476       # it doesn't have to be exclusive
14477       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14478       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14479
14480   def DeclareLocks(self, level):
14481     """Last minute lock declaration."""
14482     # All nodes are locked anyway, so nothing to do here.
14483
14484   def BuildHooksEnv(self):
14485     """Build hooks env.
14486
14487     This will run on the master, primary node and target node.
14488
14489     """
14490     env = {
14491       "EXPORT_MODE": self.op.mode,
14492       "EXPORT_NODE": self.op.target_node,
14493       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14494       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14495       # TODO: Generic function for boolean env variables
14496       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14497       }
14498
14499     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14500
14501     return env
14502
14503   def BuildHooksNodes(self):
14504     """Build hooks nodes.
14505
14506     """
14507     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14508
14509     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14510       nl.append(self.op.target_node)
14511
14512     return (nl, nl)
14513
14514   def CheckPrereq(self):
14515     """Check prerequisites.
14516
14517     This checks that the instance and node names are valid.
14518
14519     """
14520     instance_name = self.op.instance_name
14521
14522     self.instance = self.cfg.GetInstanceInfo(instance_name)
14523     assert self.instance is not None, \
14524           "Cannot retrieve locked instance %s" % self.op.instance_name
14525     _CheckNodeOnline(self, self.instance.primary_node)
14526
14527     if (self.op.remove_instance and
14528         self.instance.admin_state == constants.ADMINST_UP and
14529         not self.op.shutdown):
14530       raise errors.OpPrereqError("Can not remove instance without shutting it"
14531                                  " down before", errors.ECODE_STATE)
14532
14533     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14534       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14535       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14536       assert self.dst_node is not None
14537
14538       _CheckNodeOnline(self, self.dst_node.name)
14539       _CheckNodeNotDrained(self, self.dst_node.name)
14540
14541       self._cds = None
14542       self.dest_disk_info = None
14543       self.dest_x509_ca = None
14544
14545     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14546       self.dst_node = None
14547
14548       if len(self.op.target_node) != len(self.instance.disks):
14549         raise errors.OpPrereqError(("Received destination information for %s"
14550                                     " disks, but instance %s has %s disks") %
14551                                    (len(self.op.target_node), instance_name,
14552                                     len(self.instance.disks)),
14553                                    errors.ECODE_INVAL)
14554
14555       cds = _GetClusterDomainSecret()
14556
14557       # Check X509 key name
14558       try:
14559         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14560       except (TypeError, ValueError), err:
14561         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14562                                    errors.ECODE_INVAL)
14563
14564       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14565         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14566                                    errors.ECODE_INVAL)
14567
14568       # Load and verify CA
14569       try:
14570         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14571       except OpenSSL.crypto.Error, err:
14572         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14573                                    (err, ), errors.ECODE_INVAL)
14574
14575       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14576       if errcode is not None:
14577         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14578                                    (msg, ), errors.ECODE_INVAL)
14579
14580       self.dest_x509_ca = cert
14581
14582       # Verify target information
14583       disk_info = []
14584       for idx, disk_data in enumerate(self.op.target_node):
14585         try:
14586           (host, port, magic) = \
14587             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14588         except errors.GenericError, err:
14589           raise errors.OpPrereqError("Target info for disk %s: %s" %
14590                                      (idx, err), errors.ECODE_INVAL)
14591
14592         disk_info.append((host, port, magic))
14593
14594       assert len(disk_info) == len(self.op.target_node)
14595       self.dest_disk_info = disk_info
14596
14597     else:
14598       raise errors.ProgrammerError("Unhandled export mode %r" %
14599                                    self.op.mode)
14600
14601     # instance disk type verification
14602     # TODO: Implement export support for file-based disks
14603     for disk in self.instance.disks:
14604       if disk.dev_type == constants.LD_FILE:
14605         raise errors.OpPrereqError("Export not supported for instances with"
14606                                    " file-based disks", errors.ECODE_INVAL)
14607
14608   def _CleanupExports(self, feedback_fn):
14609     """Removes exports of current instance from all other nodes.
14610
14611     If an instance in a cluster with nodes A..D was exported to node C, its
14612     exports will be removed from the nodes A, B and D.
14613
14614     """
14615     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14616
14617     nodelist = self.cfg.GetNodeList()
14618     nodelist.remove(self.dst_node.name)
14619
14620     # on one-node clusters nodelist will be empty after the removal
14621     # if we proceed the backup would be removed because OpBackupQuery
14622     # substitutes an empty list with the full cluster node list.
14623     iname = self.instance.name
14624     if nodelist:
14625       feedback_fn("Removing old exports for instance %s" % iname)
14626       exportlist = self.rpc.call_export_list(nodelist)
14627       for node in exportlist:
14628         if exportlist[node].fail_msg:
14629           continue
14630         if iname in exportlist[node].payload:
14631           msg = self.rpc.call_export_remove(node, iname).fail_msg
14632           if msg:
14633             self.LogWarning("Could not remove older export for instance %s"
14634                             " on node %s: %s", iname, node, msg)
14635
14636   def Exec(self, feedback_fn):
14637     """Export an instance to an image in the cluster.
14638
14639     """
14640     assert self.op.mode in constants.EXPORT_MODES
14641
14642     instance = self.instance
14643     src_node = instance.primary_node
14644
14645     if self.op.shutdown:
14646       # shutdown the instance, but not the disks
14647       feedback_fn("Shutting down instance %s" % instance.name)
14648       result = self.rpc.call_instance_shutdown(src_node, instance,
14649                                                self.op.shutdown_timeout)
14650       # TODO: Maybe ignore failures if ignore_remove_failures is set
14651       result.Raise("Could not shutdown instance %s on"
14652                    " node %s" % (instance.name, src_node))
14653
14654     # set the disks ID correctly since call_instance_start needs the
14655     # correct drbd minor to create the symlinks
14656     for disk in instance.disks:
14657       self.cfg.SetDiskID(disk, src_node)
14658
14659     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14660
14661     if activate_disks:
14662       # Activate the instance disks if we'exporting a stopped instance
14663       feedback_fn("Activating disks for %s" % instance.name)
14664       _StartInstanceDisks(self, instance, None)
14665
14666     try:
14667       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14668                                                      instance)
14669
14670       helper.CreateSnapshots()
14671       try:
14672         if (self.op.shutdown and
14673             instance.admin_state == constants.ADMINST_UP and
14674             not self.op.remove_instance):
14675           assert not activate_disks
14676           feedback_fn("Starting instance %s" % instance.name)
14677           result = self.rpc.call_instance_start(src_node,
14678                                                 (instance, None, None), False)
14679           msg = result.fail_msg
14680           if msg:
14681             feedback_fn("Failed to start instance: %s" % msg)
14682             _ShutdownInstanceDisks(self, instance)
14683             raise errors.OpExecError("Could not start instance: %s" % msg)
14684
14685         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14686           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14687         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14688           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14689           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14690
14691           (key_name, _, _) = self.x509_key_name
14692
14693           dest_ca_pem = \
14694             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14695                                             self.dest_x509_ca)
14696
14697           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14698                                                      key_name, dest_ca_pem,
14699                                                      timeouts)
14700       finally:
14701         helper.Cleanup()
14702
14703       # Check for backwards compatibility
14704       assert len(dresults) == len(instance.disks)
14705       assert compat.all(isinstance(i, bool) for i in dresults), \
14706              "Not all results are boolean: %r" % dresults
14707
14708     finally:
14709       if activate_disks:
14710         feedback_fn("Deactivating disks for %s" % instance.name)
14711         _ShutdownInstanceDisks(self, instance)
14712
14713     if not (compat.all(dresults) and fin_resu):
14714       failures = []
14715       if not fin_resu:
14716         failures.append("export finalization")
14717       if not compat.all(dresults):
14718         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14719                                if not dsk)
14720         failures.append("disk export: disk(s) %s" % fdsk)
14721
14722       raise errors.OpExecError("Export failed, errors in %s" %
14723                                utils.CommaJoin(failures))
14724
14725     # At this point, the export was successful, we can cleanup/finish
14726
14727     # Remove instance if requested
14728     if self.op.remove_instance:
14729       feedback_fn("Removing instance %s" % instance.name)
14730       _RemoveInstance(self, feedback_fn, instance,
14731                       self.op.ignore_remove_failures)
14732
14733     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14734       self._CleanupExports(feedback_fn)
14735
14736     return fin_resu, dresults
14737
14738
14739 class LUBackupRemove(NoHooksLU):
14740   """Remove exports related to the named instance.
14741
14742   """
14743   REQ_BGL = False
14744
14745   def ExpandNames(self):
14746     self.needed_locks = {
14747       # We need all nodes to be locked in order for RemoveExport to work, but
14748       # we don't need to lock the instance itself, as nothing will happen to it
14749       # (and we can remove exports also for a removed instance)
14750       locking.LEVEL_NODE: locking.ALL_SET,
14751
14752       # Removing backups is quick, so blocking allocations is justified
14753       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14754       }
14755
14756     # Allocations should be stopped while this LU runs with node locks, but it
14757     # doesn't have to be exclusive
14758     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14759
14760   def Exec(self, feedback_fn):
14761     """Remove any export.
14762
14763     """
14764     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14765     # If the instance was not found we'll try with the name that was passed in.
14766     # This will only work if it was an FQDN, though.
14767     fqdn_warn = False
14768     if not instance_name:
14769       fqdn_warn = True
14770       instance_name = self.op.instance_name
14771
14772     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14773     exportlist = self.rpc.call_export_list(locked_nodes)
14774     found = False
14775     for node in exportlist:
14776       msg = exportlist[node].fail_msg
14777       if msg:
14778         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14779         continue
14780       if instance_name in exportlist[node].payload:
14781         found = True
14782         result = self.rpc.call_export_remove(node, instance_name)
14783         msg = result.fail_msg
14784         if msg:
14785           logging.error("Could not remove export for instance %s"
14786                         " on node %s: %s", instance_name, node, msg)
14787
14788     if fqdn_warn and not found:
14789       feedback_fn("Export not found. If trying to remove an export belonging"
14790                   " to a deleted instance please use its Fully Qualified"
14791                   " Domain Name.")
14792
14793
14794 class LUGroupAdd(LogicalUnit):
14795   """Logical unit for creating node groups.
14796
14797   """
14798   HPATH = "group-add"
14799   HTYPE = constants.HTYPE_GROUP
14800   REQ_BGL = False
14801
14802   def ExpandNames(self):
14803     # We need the new group's UUID here so that we can create and acquire the
14804     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14805     # that it should not check whether the UUID exists in the configuration.
14806     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14807     self.needed_locks = {}
14808     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14809
14810   def CheckPrereq(self):
14811     """Check prerequisites.
14812
14813     This checks that the given group name is not an existing node group
14814     already.
14815
14816     """
14817     try:
14818       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14819     except errors.OpPrereqError:
14820       pass
14821     else:
14822       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14823                                  " node group (UUID: %s)" %
14824                                  (self.op.group_name, existing_uuid),
14825                                  errors.ECODE_EXISTS)
14826
14827     if self.op.ndparams:
14828       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14829
14830     if self.op.hv_state:
14831       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14832     else:
14833       self.new_hv_state = None
14834
14835     if self.op.disk_state:
14836       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14837     else:
14838       self.new_disk_state = None
14839
14840     if self.op.diskparams:
14841       for templ in constants.DISK_TEMPLATES:
14842         if templ in self.op.diskparams:
14843           utils.ForceDictType(self.op.diskparams[templ],
14844                               constants.DISK_DT_TYPES)
14845       self.new_diskparams = self.op.diskparams
14846       try:
14847         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14848       except errors.OpPrereqError, err:
14849         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14850                                    errors.ECODE_INVAL)
14851     else:
14852       self.new_diskparams = {}
14853
14854     if self.op.ipolicy:
14855       cluster = self.cfg.GetClusterInfo()
14856       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14857       try:
14858         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14859       except errors.ConfigurationError, err:
14860         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14861                                    errors.ECODE_INVAL)
14862
14863   def BuildHooksEnv(self):
14864     """Build hooks env.
14865
14866     """
14867     return {
14868       "GROUP_NAME": self.op.group_name,
14869       }
14870
14871   def BuildHooksNodes(self):
14872     """Build hooks nodes.
14873
14874     """
14875     mn = self.cfg.GetMasterNode()
14876     return ([mn], [mn])
14877
14878   def Exec(self, feedback_fn):
14879     """Add the node group to the cluster.
14880
14881     """
14882     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14883                                   uuid=self.group_uuid,
14884                                   alloc_policy=self.op.alloc_policy,
14885                                   ndparams=self.op.ndparams,
14886                                   diskparams=self.new_diskparams,
14887                                   ipolicy=self.op.ipolicy,
14888                                   hv_state_static=self.new_hv_state,
14889                                   disk_state_static=self.new_disk_state)
14890
14891     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14892     del self.remove_locks[locking.LEVEL_NODEGROUP]
14893
14894
14895 class LUGroupAssignNodes(NoHooksLU):
14896   """Logical unit for assigning nodes to groups.
14897
14898   """
14899   REQ_BGL = False
14900
14901   def ExpandNames(self):
14902     # These raise errors.OpPrereqError on their own:
14903     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14904     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14905
14906     # We want to lock all the affected nodes and groups. We have readily
14907     # available the list of nodes, and the *destination* group. To gather the
14908     # list of "source" groups, we need to fetch node information later on.
14909     self.needed_locks = {
14910       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14911       locking.LEVEL_NODE: self.op.nodes,
14912       }
14913
14914   def DeclareLocks(self, level):
14915     if level == locking.LEVEL_NODEGROUP:
14916       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14917
14918       # Try to get all affected nodes' groups without having the group or node
14919       # lock yet. Needs verification later in the code flow.
14920       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14921
14922       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14923
14924   def CheckPrereq(self):
14925     """Check prerequisites.
14926
14927     """
14928     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14929     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14930             frozenset(self.op.nodes))
14931
14932     expected_locks = (set([self.group_uuid]) |
14933                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14934     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14935     if actual_locks != expected_locks:
14936       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14937                                " current groups are '%s', used to be '%s'" %
14938                                (utils.CommaJoin(expected_locks),
14939                                 utils.CommaJoin(actual_locks)))
14940
14941     self.node_data = self.cfg.GetAllNodesInfo()
14942     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14943     instance_data = self.cfg.GetAllInstancesInfo()
14944
14945     if self.group is None:
14946       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14947                                (self.op.group_name, self.group_uuid))
14948
14949     (new_splits, previous_splits) = \
14950       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14951                                              for node in self.op.nodes],
14952                                             self.node_data, instance_data)
14953
14954     if new_splits:
14955       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14956
14957       if not self.op.force:
14958         raise errors.OpExecError("The following instances get split by this"
14959                                  " change and --force was not given: %s" %
14960                                  fmt_new_splits)
14961       else:
14962         self.LogWarning("This operation will split the following instances: %s",
14963                         fmt_new_splits)
14964
14965         if previous_splits:
14966           self.LogWarning("In addition, these already-split instances continue"
14967                           " to be split across groups: %s",
14968                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14969
14970   def Exec(self, feedback_fn):
14971     """Assign nodes to a new group.
14972
14973     """
14974     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14975
14976     self.cfg.AssignGroupNodes(mods)
14977
14978   @staticmethod
14979   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14980     """Check for split instances after a node assignment.
14981
14982     This method considers a series of node assignments as an atomic operation,
14983     and returns information about split instances after applying the set of
14984     changes.
14985
14986     In particular, it returns information about newly split instances, and
14987     instances that were already split, and remain so after the change.
14988
14989     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14990     considered.
14991
14992     @type changes: list of (node_name, new_group_uuid) pairs.
14993     @param changes: list of node assignments to consider.
14994     @param node_data: a dict with data for all nodes
14995     @param instance_data: a dict with all instances to consider
14996     @rtype: a two-tuple
14997     @return: a list of instances that were previously okay and result split as a
14998       consequence of this change, and a list of instances that were previously
14999       split and this change does not fix.
15000
15001     """
15002     changed_nodes = dict((node, group) for node, group in changes
15003                          if node_data[node].group != group)
15004
15005     all_split_instances = set()
15006     previously_split_instances = set()
15007
15008     def InstanceNodes(instance):
15009       return [instance.primary_node] + list(instance.secondary_nodes)
15010
15011     for inst in instance_data.values():
15012       if inst.disk_template not in constants.DTS_INT_MIRROR:
15013         continue
15014
15015       instance_nodes = InstanceNodes(inst)
15016
15017       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15018         previously_split_instances.add(inst.name)
15019
15020       if len(set(changed_nodes.get(node, node_data[node].group)
15021                  for node in instance_nodes)) > 1:
15022         all_split_instances.add(inst.name)
15023
15024     return (list(all_split_instances - previously_split_instances),
15025             list(previously_split_instances & all_split_instances))
15026
15027
15028 class _GroupQuery(_QueryBase):
15029   FIELDS = query.GROUP_FIELDS
15030
15031   def ExpandNames(self, lu):
15032     lu.needed_locks = {}
15033
15034     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15035     self._cluster = lu.cfg.GetClusterInfo()
15036     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15037
15038     if not self.names:
15039       self.wanted = [name_to_uuid[name]
15040                      for name in utils.NiceSort(name_to_uuid.keys())]
15041     else:
15042       # Accept names to be either names or UUIDs.
15043       missing = []
15044       self.wanted = []
15045       all_uuid = frozenset(self._all_groups.keys())
15046
15047       for name in self.names:
15048         if name in all_uuid:
15049           self.wanted.append(name)
15050         elif name in name_to_uuid:
15051           self.wanted.append(name_to_uuid[name])
15052         else:
15053           missing.append(name)
15054
15055       if missing:
15056         raise errors.OpPrereqError("Some groups do not exist: %s" %
15057                                    utils.CommaJoin(missing),
15058                                    errors.ECODE_NOENT)
15059
15060   def DeclareLocks(self, lu, level):
15061     pass
15062
15063   def _GetQueryData(self, lu):
15064     """Computes the list of node groups and their attributes.
15065
15066     """
15067     do_nodes = query.GQ_NODE in self.requested_data
15068     do_instances = query.GQ_INST in self.requested_data
15069
15070     group_to_nodes = None
15071     group_to_instances = None
15072
15073     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15074     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15075     # latter GetAllInstancesInfo() is not enough, for we have to go through
15076     # instance->node. Hence, we will need to process nodes even if we only need
15077     # instance information.
15078     if do_nodes or do_instances:
15079       all_nodes = lu.cfg.GetAllNodesInfo()
15080       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15081       node_to_group = {}
15082
15083       for node in all_nodes.values():
15084         if node.group in group_to_nodes:
15085           group_to_nodes[node.group].append(node.name)
15086           node_to_group[node.name] = node.group
15087
15088       if do_instances:
15089         all_instances = lu.cfg.GetAllInstancesInfo()
15090         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15091
15092         for instance in all_instances.values():
15093           node = instance.primary_node
15094           if node in node_to_group:
15095             group_to_instances[node_to_group[node]].append(instance.name)
15096
15097         if not do_nodes:
15098           # Do not pass on node information if it was not requested.
15099           group_to_nodes = None
15100
15101     return query.GroupQueryData(self._cluster,
15102                                 [self._all_groups[uuid]
15103                                  for uuid in self.wanted],
15104                                 group_to_nodes, group_to_instances,
15105                                 query.GQ_DISKPARAMS in self.requested_data)
15106
15107
15108 class LUGroupQuery(NoHooksLU):
15109   """Logical unit for querying node groups.
15110
15111   """
15112   REQ_BGL = False
15113
15114   def CheckArguments(self):
15115     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15116                           self.op.output_fields, False)
15117
15118   def ExpandNames(self):
15119     self.gq.ExpandNames(self)
15120
15121   def DeclareLocks(self, level):
15122     self.gq.DeclareLocks(self, level)
15123
15124   def Exec(self, feedback_fn):
15125     return self.gq.OldStyleQuery(self)
15126
15127
15128 class LUGroupSetParams(LogicalUnit):
15129   """Modifies the parameters of a node group.
15130
15131   """
15132   HPATH = "group-modify"
15133   HTYPE = constants.HTYPE_GROUP
15134   REQ_BGL = False
15135
15136   def CheckArguments(self):
15137     all_changes = [
15138       self.op.ndparams,
15139       self.op.diskparams,
15140       self.op.alloc_policy,
15141       self.op.hv_state,
15142       self.op.disk_state,
15143       self.op.ipolicy,
15144       ]
15145
15146     if all_changes.count(None) == len(all_changes):
15147       raise errors.OpPrereqError("Please pass at least one modification",
15148                                  errors.ECODE_INVAL)
15149
15150   def ExpandNames(self):
15151     # This raises errors.OpPrereqError on its own:
15152     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15153
15154     self.needed_locks = {
15155       locking.LEVEL_INSTANCE: [],
15156       locking.LEVEL_NODEGROUP: [self.group_uuid],
15157       }
15158
15159     self.share_locks[locking.LEVEL_INSTANCE] = 1
15160
15161   def DeclareLocks(self, level):
15162     if level == locking.LEVEL_INSTANCE:
15163       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15164
15165       # Lock instances optimistically, needs verification once group lock has
15166       # been acquired
15167       self.needed_locks[locking.LEVEL_INSTANCE] = \
15168           self.cfg.GetNodeGroupInstances(self.group_uuid)
15169
15170   @staticmethod
15171   def _UpdateAndVerifyDiskParams(old, new):
15172     """Updates and verifies disk parameters.
15173
15174     """
15175     new_params = _GetUpdatedParams(old, new)
15176     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15177     return new_params
15178
15179   def CheckPrereq(self):
15180     """Check prerequisites.
15181
15182     """
15183     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15184
15185     # Check if locked instances are still correct
15186     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15187
15188     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15189     cluster = self.cfg.GetClusterInfo()
15190
15191     if self.group is None:
15192       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15193                                (self.op.group_name, self.group_uuid))
15194
15195     if self.op.ndparams:
15196       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15197       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15198       self.new_ndparams = new_ndparams
15199
15200     if self.op.diskparams:
15201       diskparams = self.group.diskparams
15202       uavdp = self._UpdateAndVerifyDiskParams
15203       # For each disktemplate subdict update and verify the values
15204       new_diskparams = dict((dt,
15205                              uavdp(diskparams.get(dt, {}),
15206                                    self.op.diskparams[dt]))
15207                             for dt in constants.DISK_TEMPLATES
15208                             if dt in self.op.diskparams)
15209       # As we've all subdicts of diskparams ready, lets merge the actual
15210       # dict with all updated subdicts
15211       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15212       try:
15213         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15214       except errors.OpPrereqError, err:
15215         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15216                                    errors.ECODE_INVAL)
15217
15218     if self.op.hv_state:
15219       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15220                                                  self.group.hv_state_static)
15221
15222     if self.op.disk_state:
15223       self.new_disk_state = \
15224         _MergeAndVerifyDiskState(self.op.disk_state,
15225                                  self.group.disk_state_static)
15226
15227     if self.op.ipolicy:
15228       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15229                                             self.op.ipolicy,
15230                                             group_policy=True)
15231
15232       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15233       inst_filter = lambda inst: inst.name in owned_instances
15234       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15235       gmi = ganeti.masterd.instance
15236       violations = \
15237           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15238                                                                   self.group),
15239                                         new_ipolicy, instances)
15240
15241       if violations:
15242         self.LogWarning("After the ipolicy change the following instances"
15243                         " violate them: %s",
15244                         utils.CommaJoin(violations))
15245
15246   def BuildHooksEnv(self):
15247     """Build hooks env.
15248
15249     """
15250     return {
15251       "GROUP_NAME": self.op.group_name,
15252       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15253       }
15254
15255   def BuildHooksNodes(self):
15256     """Build hooks nodes.
15257
15258     """
15259     mn = self.cfg.GetMasterNode()
15260     return ([mn], [mn])
15261
15262   def Exec(self, feedback_fn):
15263     """Modifies the node group.
15264
15265     """
15266     result = []
15267
15268     if self.op.ndparams:
15269       self.group.ndparams = self.new_ndparams
15270       result.append(("ndparams", str(self.group.ndparams)))
15271
15272     if self.op.diskparams:
15273       self.group.diskparams = self.new_diskparams
15274       result.append(("diskparams", str(self.group.diskparams)))
15275
15276     if self.op.alloc_policy:
15277       self.group.alloc_policy = self.op.alloc_policy
15278
15279     if self.op.hv_state:
15280       self.group.hv_state_static = self.new_hv_state
15281
15282     if self.op.disk_state:
15283       self.group.disk_state_static = self.new_disk_state
15284
15285     if self.op.ipolicy:
15286       self.group.ipolicy = self.new_ipolicy
15287
15288     self.cfg.Update(self.group, feedback_fn)
15289     return result
15290
15291
15292 class LUGroupRemove(LogicalUnit):
15293   HPATH = "group-remove"
15294   HTYPE = constants.HTYPE_GROUP
15295   REQ_BGL = False
15296
15297   def ExpandNames(self):
15298     # This will raises errors.OpPrereqError on its own:
15299     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15300     self.needed_locks = {
15301       locking.LEVEL_NODEGROUP: [self.group_uuid],
15302       }
15303
15304   def CheckPrereq(self):
15305     """Check prerequisites.
15306
15307     This checks that the given group name exists as a node group, that is
15308     empty (i.e., contains no nodes), and that is not the last group of the
15309     cluster.
15310
15311     """
15312     # Verify that the group is empty.
15313     group_nodes = [node.name
15314                    for node in self.cfg.GetAllNodesInfo().values()
15315                    if node.group == self.group_uuid]
15316
15317     if group_nodes:
15318       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15319                                  " nodes: %s" %
15320                                  (self.op.group_name,
15321                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15322                                  errors.ECODE_STATE)
15323
15324     # Verify the cluster would not be left group-less.
15325     if len(self.cfg.GetNodeGroupList()) == 1:
15326       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15327                                  " removed" % self.op.group_name,
15328                                  errors.ECODE_STATE)
15329
15330   def BuildHooksEnv(self):
15331     """Build hooks env.
15332
15333     """
15334     return {
15335       "GROUP_NAME": self.op.group_name,
15336       }
15337
15338   def BuildHooksNodes(self):
15339     """Build hooks nodes.
15340
15341     """
15342     mn = self.cfg.GetMasterNode()
15343     return ([mn], [mn])
15344
15345   def Exec(self, feedback_fn):
15346     """Remove the node group.
15347
15348     """
15349     try:
15350       self.cfg.RemoveNodeGroup(self.group_uuid)
15351     except errors.ConfigurationError:
15352       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15353                                (self.op.group_name, self.group_uuid))
15354
15355     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15356
15357
15358 class LUGroupRename(LogicalUnit):
15359   HPATH = "group-rename"
15360   HTYPE = constants.HTYPE_GROUP
15361   REQ_BGL = False
15362
15363   def ExpandNames(self):
15364     # This raises errors.OpPrereqError on its own:
15365     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15366
15367     self.needed_locks = {
15368       locking.LEVEL_NODEGROUP: [self.group_uuid],
15369       }
15370
15371   def CheckPrereq(self):
15372     """Check prerequisites.
15373
15374     Ensures requested new name is not yet used.
15375
15376     """
15377     try:
15378       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15379     except errors.OpPrereqError:
15380       pass
15381     else:
15382       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15383                                  " node group (UUID: %s)" %
15384                                  (self.op.new_name, new_name_uuid),
15385                                  errors.ECODE_EXISTS)
15386
15387   def BuildHooksEnv(self):
15388     """Build hooks env.
15389
15390     """
15391     return {
15392       "OLD_NAME": self.op.group_name,
15393       "NEW_NAME": self.op.new_name,
15394       }
15395
15396   def BuildHooksNodes(self):
15397     """Build hooks nodes.
15398
15399     """
15400     mn = self.cfg.GetMasterNode()
15401
15402     all_nodes = self.cfg.GetAllNodesInfo()
15403     all_nodes.pop(mn, None)
15404
15405     run_nodes = [mn]
15406     run_nodes.extend(node.name for node in all_nodes.values()
15407                      if node.group == self.group_uuid)
15408
15409     return (run_nodes, run_nodes)
15410
15411   def Exec(self, feedback_fn):
15412     """Rename the node group.
15413
15414     """
15415     group = self.cfg.GetNodeGroup(self.group_uuid)
15416
15417     if group is None:
15418       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15419                                (self.op.group_name, self.group_uuid))
15420
15421     group.name = self.op.new_name
15422     self.cfg.Update(group, feedback_fn)
15423
15424     return self.op.new_name
15425
15426
15427 class LUGroupEvacuate(LogicalUnit):
15428   HPATH = "group-evacuate"
15429   HTYPE = constants.HTYPE_GROUP
15430   REQ_BGL = False
15431
15432   def ExpandNames(self):
15433     # This raises errors.OpPrereqError on its own:
15434     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15435
15436     if self.op.target_groups:
15437       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15438                                   self.op.target_groups)
15439     else:
15440       self.req_target_uuids = []
15441
15442     if self.group_uuid in self.req_target_uuids:
15443       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15444                                  " as a target group (targets are %s)" %
15445                                  (self.group_uuid,
15446                                   utils.CommaJoin(self.req_target_uuids)),
15447                                  errors.ECODE_INVAL)
15448
15449     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15450
15451     self.share_locks = _ShareAll()
15452     self.needed_locks = {
15453       locking.LEVEL_INSTANCE: [],
15454       locking.LEVEL_NODEGROUP: [],
15455       locking.LEVEL_NODE: [],
15456       }
15457
15458   def DeclareLocks(self, level):
15459     if level == locking.LEVEL_INSTANCE:
15460       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15461
15462       # Lock instances optimistically, needs verification once node and group
15463       # locks have been acquired
15464       self.needed_locks[locking.LEVEL_INSTANCE] = \
15465         self.cfg.GetNodeGroupInstances(self.group_uuid)
15466
15467     elif level == locking.LEVEL_NODEGROUP:
15468       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15469
15470       if self.req_target_uuids:
15471         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15472
15473         # Lock all groups used by instances optimistically; this requires going
15474         # via the node before it's locked, requiring verification later on
15475         lock_groups.update(group_uuid
15476                            for instance_name in
15477                              self.owned_locks(locking.LEVEL_INSTANCE)
15478                            for group_uuid in
15479                              self.cfg.GetInstanceNodeGroups(instance_name))
15480       else:
15481         # No target groups, need to lock all of them
15482         lock_groups = locking.ALL_SET
15483
15484       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15485
15486     elif level == locking.LEVEL_NODE:
15487       # This will only lock the nodes in the group to be evacuated which
15488       # contain actual instances
15489       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15490       self._LockInstancesNodes()
15491
15492       # Lock all nodes in group to be evacuated and target groups
15493       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15494       assert self.group_uuid in owned_groups
15495       member_nodes = [node_name
15496                       for group in owned_groups
15497                       for node_name in self.cfg.GetNodeGroup(group).members]
15498       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15499
15500   def CheckPrereq(self):
15501     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15502     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15503     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15504
15505     assert owned_groups.issuperset(self.req_target_uuids)
15506     assert self.group_uuid in owned_groups
15507
15508     # Check if locked instances are still correct
15509     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15510
15511     # Get instance information
15512     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15513
15514     # Check if node groups for locked instances are still correct
15515     _CheckInstancesNodeGroups(self.cfg, self.instances,
15516                               owned_groups, owned_nodes, self.group_uuid)
15517
15518     if self.req_target_uuids:
15519       # User requested specific target groups
15520       self.target_uuids = self.req_target_uuids
15521     else:
15522       # All groups except the one to be evacuated are potential targets
15523       self.target_uuids = [group_uuid for group_uuid in owned_groups
15524                            if group_uuid != self.group_uuid]
15525
15526       if not self.target_uuids:
15527         raise errors.OpPrereqError("There are no possible target groups",
15528                                    errors.ECODE_INVAL)
15529
15530   def BuildHooksEnv(self):
15531     """Build hooks env.
15532
15533     """
15534     return {
15535       "GROUP_NAME": self.op.group_name,
15536       "TARGET_GROUPS": " ".join(self.target_uuids),
15537       }
15538
15539   def BuildHooksNodes(self):
15540     """Build hooks nodes.
15541
15542     """
15543     mn = self.cfg.GetMasterNode()
15544
15545     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15546
15547     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15548
15549     return (run_nodes, run_nodes)
15550
15551   def Exec(self, feedback_fn):
15552     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15553
15554     assert self.group_uuid not in self.target_uuids
15555
15556     req = iallocator.IAReqGroupChange(instances=instances,
15557                                       target_groups=self.target_uuids)
15558     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15559
15560     ial.Run(self.op.iallocator)
15561
15562     if not ial.success:
15563       raise errors.OpPrereqError("Can't compute group evacuation using"
15564                                  " iallocator '%s': %s" %
15565                                  (self.op.iallocator, ial.info),
15566                                  errors.ECODE_NORES)
15567
15568     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15569
15570     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15571                  len(jobs), self.op.group_name)
15572
15573     return ResultWithJobs(jobs)
15574
15575
15576 class TagsLU(NoHooksLU): # pylint: disable=W0223
15577   """Generic tags LU.
15578
15579   This is an abstract class which is the parent of all the other tags LUs.
15580
15581   """
15582   def ExpandNames(self):
15583     self.group_uuid = None
15584     self.needed_locks = {}
15585
15586     if self.op.kind == constants.TAG_NODE:
15587       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15588       lock_level = locking.LEVEL_NODE
15589       lock_name = self.op.name
15590     elif self.op.kind == constants.TAG_INSTANCE:
15591       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15592       lock_level = locking.LEVEL_INSTANCE
15593       lock_name = self.op.name
15594     elif self.op.kind == constants.TAG_NODEGROUP:
15595       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15596       lock_level = locking.LEVEL_NODEGROUP
15597       lock_name = self.group_uuid
15598     elif self.op.kind == constants.TAG_NETWORK:
15599       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15600       lock_level = locking.LEVEL_NETWORK
15601       lock_name = self.network_uuid
15602     else:
15603       lock_level = None
15604       lock_name = None
15605
15606     if lock_level and getattr(self.op, "use_locking", True):
15607       self.needed_locks[lock_level] = lock_name
15608
15609     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15610     # not possible to acquire the BGL based on opcode parameters)
15611
15612   def CheckPrereq(self):
15613     """Check prerequisites.
15614
15615     """
15616     if self.op.kind == constants.TAG_CLUSTER:
15617       self.target = self.cfg.GetClusterInfo()
15618     elif self.op.kind == constants.TAG_NODE:
15619       self.target = self.cfg.GetNodeInfo(self.op.name)
15620     elif self.op.kind == constants.TAG_INSTANCE:
15621       self.target = self.cfg.GetInstanceInfo(self.op.name)
15622     elif self.op.kind == constants.TAG_NODEGROUP:
15623       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15624     elif self.op.kind == constants.TAG_NETWORK:
15625       self.target = self.cfg.GetNetwork(self.network_uuid)
15626     else:
15627       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15628                                  str(self.op.kind), errors.ECODE_INVAL)
15629
15630
15631 class LUTagsGet(TagsLU):
15632   """Returns the tags of a given object.
15633
15634   """
15635   REQ_BGL = False
15636
15637   def ExpandNames(self):
15638     TagsLU.ExpandNames(self)
15639
15640     # Share locks as this is only a read operation
15641     self.share_locks = _ShareAll()
15642
15643   def Exec(self, feedback_fn):
15644     """Returns the tag list.
15645
15646     """
15647     return list(self.target.GetTags())
15648
15649
15650 class LUTagsSearch(NoHooksLU):
15651   """Searches the tags for a given pattern.
15652
15653   """
15654   REQ_BGL = False
15655
15656   def ExpandNames(self):
15657     self.needed_locks = {}
15658
15659   def CheckPrereq(self):
15660     """Check prerequisites.
15661
15662     This checks the pattern passed for validity by compiling it.
15663
15664     """
15665     try:
15666       self.re = re.compile(self.op.pattern)
15667     except re.error, err:
15668       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15669                                  (self.op.pattern, err), errors.ECODE_INVAL)
15670
15671   def Exec(self, feedback_fn):
15672     """Returns the tag list.
15673
15674     """
15675     cfg = self.cfg
15676     tgts = [("/cluster", cfg.GetClusterInfo())]
15677     ilist = cfg.GetAllInstancesInfo().values()
15678     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15679     nlist = cfg.GetAllNodesInfo().values()
15680     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15681     tgts.extend(("/nodegroup/%s" % n.name, n)
15682                 for n in cfg.GetAllNodeGroupsInfo().values())
15683     results = []
15684     for path, target in tgts:
15685       for tag in target.GetTags():
15686         if self.re.search(tag):
15687           results.append((path, tag))
15688     return results
15689
15690
15691 class LUTagsSet(TagsLU):
15692   """Sets a tag on a given object.
15693
15694   """
15695   REQ_BGL = False
15696
15697   def CheckPrereq(self):
15698     """Check prerequisites.
15699
15700     This checks the type and length of the tag name and value.
15701
15702     """
15703     TagsLU.CheckPrereq(self)
15704     for tag in self.op.tags:
15705       objects.TaggableObject.ValidateTag(tag)
15706
15707   def Exec(self, feedback_fn):
15708     """Sets the tag.
15709
15710     """
15711     try:
15712       for tag in self.op.tags:
15713         self.target.AddTag(tag)
15714     except errors.TagError, err:
15715       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15716     self.cfg.Update(self.target, feedback_fn)
15717
15718
15719 class LUTagsDel(TagsLU):
15720   """Delete a list of tags from a given object.
15721
15722   """
15723   REQ_BGL = False
15724
15725   def CheckPrereq(self):
15726     """Check prerequisites.
15727
15728     This checks that we have the given tag.
15729
15730     """
15731     TagsLU.CheckPrereq(self)
15732     for tag in self.op.tags:
15733       objects.TaggableObject.ValidateTag(tag)
15734     del_tags = frozenset(self.op.tags)
15735     cur_tags = self.target.GetTags()
15736
15737     diff_tags = del_tags - cur_tags
15738     if diff_tags:
15739       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15740       raise errors.OpPrereqError("Tag(s) %s not found" %
15741                                  (utils.CommaJoin(diff_names), ),
15742                                  errors.ECODE_NOENT)
15743
15744   def Exec(self, feedback_fn):
15745     """Remove the tag from the object.
15746
15747     """
15748     for tag in self.op.tags:
15749       self.target.RemoveTag(tag)
15750     self.cfg.Update(self.target, feedback_fn)
15751
15752
15753 class LUTestDelay(NoHooksLU):
15754   """Sleep for a specified amount of time.
15755
15756   This LU sleeps on the master and/or nodes for a specified amount of
15757   time.
15758
15759   """
15760   REQ_BGL = False
15761
15762   def ExpandNames(self):
15763     """Expand names and set required locks.
15764
15765     This expands the node list, if any.
15766
15767     """
15768     self.needed_locks = {}
15769     if self.op.on_nodes:
15770       # _GetWantedNodes can be used here, but is not always appropriate to use
15771       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15772       # more information.
15773       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15774       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15775
15776   def _TestDelay(self):
15777     """Do the actual sleep.
15778
15779     """
15780     if self.op.on_master:
15781       if not utils.TestDelay(self.op.duration):
15782         raise errors.OpExecError("Error during master delay test")
15783     if self.op.on_nodes:
15784       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15785       for node, node_result in result.items():
15786         node_result.Raise("Failure during rpc call to node %s" % node)
15787
15788   def Exec(self, feedback_fn):
15789     """Execute the test delay opcode, with the wanted repetitions.
15790
15791     """
15792     if self.op.repeat == 0:
15793       self._TestDelay()
15794     else:
15795       top_value = self.op.repeat - 1
15796       for i in range(self.op.repeat):
15797         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15798         self._TestDelay()
15799
15800
15801 class LURestrictedCommand(NoHooksLU):
15802   """Logical unit for executing restricted commands.
15803
15804   """
15805   REQ_BGL = False
15806
15807   def ExpandNames(self):
15808     if self.op.nodes:
15809       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15810
15811     self.needed_locks = {
15812       locking.LEVEL_NODE: self.op.nodes,
15813       }
15814     self.share_locks = {
15815       locking.LEVEL_NODE: not self.op.use_locking,
15816       }
15817
15818   def CheckPrereq(self):
15819     """Check prerequisites.
15820
15821     """
15822
15823   def Exec(self, feedback_fn):
15824     """Execute restricted command and return output.
15825
15826     """
15827     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15828
15829     # Check if correct locks are held
15830     assert set(self.op.nodes).issubset(owned_nodes)
15831
15832     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15833
15834     result = []
15835
15836     for node_name in self.op.nodes:
15837       nres = rpcres[node_name]
15838       if nres.fail_msg:
15839         msg = ("Command '%s' on node '%s' failed: %s" %
15840                (self.op.command, node_name, nres.fail_msg))
15841         result.append((False, msg))
15842       else:
15843         result.append((True, nres.payload))
15844
15845     return result
15846
15847
15848 class LUTestJqueue(NoHooksLU):
15849   """Utility LU to test some aspects of the job queue.
15850
15851   """
15852   REQ_BGL = False
15853
15854   # Must be lower than default timeout for WaitForJobChange to see whether it
15855   # notices changed jobs
15856   _CLIENT_CONNECT_TIMEOUT = 20.0
15857   _CLIENT_CONFIRM_TIMEOUT = 60.0
15858
15859   @classmethod
15860   def _NotifyUsingSocket(cls, cb, errcls):
15861     """Opens a Unix socket and waits for another program to connect.
15862
15863     @type cb: callable
15864     @param cb: Callback to send socket name to client
15865     @type errcls: class
15866     @param errcls: Exception class to use for errors
15867
15868     """
15869     # Using a temporary directory as there's no easy way to create temporary
15870     # sockets without writing a custom loop around tempfile.mktemp and
15871     # socket.bind
15872     tmpdir = tempfile.mkdtemp()
15873     try:
15874       tmpsock = utils.PathJoin(tmpdir, "sock")
15875
15876       logging.debug("Creating temporary socket at %s", tmpsock)
15877       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15878       try:
15879         sock.bind(tmpsock)
15880         sock.listen(1)
15881
15882         # Send details to client
15883         cb(tmpsock)
15884
15885         # Wait for client to connect before continuing
15886         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15887         try:
15888           (conn, _) = sock.accept()
15889         except socket.error, err:
15890           raise errcls("Client didn't connect in time (%s)" % err)
15891       finally:
15892         sock.close()
15893     finally:
15894       # Remove as soon as client is connected
15895       shutil.rmtree(tmpdir)
15896
15897     # Wait for client to close
15898     try:
15899       try:
15900         # pylint: disable=E1101
15901         # Instance of '_socketobject' has no ... member
15902         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15903         conn.recv(1)
15904       except socket.error, err:
15905         raise errcls("Client failed to confirm notification (%s)" % err)
15906     finally:
15907       conn.close()
15908
15909   def _SendNotification(self, test, arg, sockname):
15910     """Sends a notification to the client.
15911
15912     @type test: string
15913     @param test: Test name
15914     @param arg: Test argument (depends on test)
15915     @type sockname: string
15916     @param sockname: Socket path
15917
15918     """
15919     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15920
15921   def _Notify(self, prereq, test, arg):
15922     """Notifies the client of a test.
15923
15924     @type prereq: bool
15925     @param prereq: Whether this is a prereq-phase test
15926     @type test: string
15927     @param test: Test name
15928     @param arg: Test argument (depends on test)
15929
15930     """
15931     if prereq:
15932       errcls = errors.OpPrereqError
15933     else:
15934       errcls = errors.OpExecError
15935
15936     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15937                                                   test, arg),
15938                                    errcls)
15939
15940   def CheckArguments(self):
15941     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15942     self.expandnames_calls = 0
15943
15944   def ExpandNames(self):
15945     checkargs_calls = getattr(self, "checkargs_calls", 0)
15946     if checkargs_calls < 1:
15947       raise errors.ProgrammerError("CheckArguments was not called")
15948
15949     self.expandnames_calls += 1
15950
15951     if self.op.notify_waitlock:
15952       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15953
15954     self.LogInfo("Expanding names")
15955
15956     # Get lock on master node (just to get a lock, not for a particular reason)
15957     self.needed_locks = {
15958       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15959       }
15960
15961   def Exec(self, feedback_fn):
15962     if self.expandnames_calls < 1:
15963       raise errors.ProgrammerError("ExpandNames was not called")
15964
15965     if self.op.notify_exec:
15966       self._Notify(False, constants.JQT_EXEC, None)
15967
15968     self.LogInfo("Executing")
15969
15970     if self.op.log_messages:
15971       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15972       for idx, msg in enumerate(self.op.log_messages):
15973         self.LogInfo("Sending log message %s", idx + 1)
15974         feedback_fn(constants.JQT_MSGPREFIX + msg)
15975         # Report how many test messages have been sent
15976         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15977
15978     if self.op.fail:
15979       raise errors.OpExecError("Opcode failure was requested")
15980
15981     return True
15982
15983
15984 class LUTestAllocator(NoHooksLU):
15985   """Run allocator tests.
15986
15987   This LU runs the allocator tests
15988
15989   """
15990   def CheckPrereq(self):
15991     """Check prerequisites.
15992
15993     This checks the opcode parameters depending on the director and mode test.
15994
15995     """
15996     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15997                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15998       for attr in ["memory", "disks", "disk_template",
15999                    "os", "tags", "nics", "vcpus"]:
16000         if not hasattr(self.op, attr):
16001           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16002                                      attr, errors.ECODE_INVAL)
16003       iname = self.cfg.ExpandInstanceName(self.op.name)
16004       if iname is not None:
16005         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16006                                    iname, errors.ECODE_EXISTS)
16007       if not isinstance(self.op.nics, list):
16008         raise errors.OpPrereqError("Invalid parameter 'nics'",
16009                                    errors.ECODE_INVAL)
16010       if not isinstance(self.op.disks, list):
16011         raise errors.OpPrereqError("Invalid parameter 'disks'",
16012                                    errors.ECODE_INVAL)
16013       for row in self.op.disks:
16014         if (not isinstance(row, dict) or
16015             constants.IDISK_SIZE not in row or
16016             not isinstance(row[constants.IDISK_SIZE], int) or
16017             constants.IDISK_MODE not in row or
16018             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16019           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16020                                      " parameter", errors.ECODE_INVAL)
16021       if self.op.hypervisor is None:
16022         self.op.hypervisor = self.cfg.GetHypervisorType()
16023     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16024       fname = _ExpandInstanceName(self.cfg, self.op.name)
16025       self.op.name = fname
16026       self.relocate_from = \
16027           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16028     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16029                           constants.IALLOCATOR_MODE_NODE_EVAC):
16030       if not self.op.instances:
16031         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16032       self.op.instances = _GetWantedInstances(self, self.op.instances)
16033     else:
16034       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16035                                  self.op.mode, errors.ECODE_INVAL)
16036
16037     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16038       if self.op.iallocator is None:
16039         raise errors.OpPrereqError("Missing allocator name",
16040                                    errors.ECODE_INVAL)
16041     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16042       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16043                                  self.op.direction, errors.ECODE_INVAL)
16044
16045   def Exec(self, feedback_fn):
16046     """Run the allocator test.
16047
16048     """
16049     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16050       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16051                                           memory=self.op.memory,
16052                                           disks=self.op.disks,
16053                                           disk_template=self.op.disk_template,
16054                                           os=self.op.os,
16055                                           tags=self.op.tags,
16056                                           nics=self.op.nics,
16057                                           vcpus=self.op.vcpus,
16058                                           spindle_use=self.op.spindle_use,
16059                                           hypervisor=self.op.hypervisor)
16060     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16061       req = iallocator.IAReqRelocate(name=self.op.name,
16062                                      relocate_from=list(self.relocate_from))
16063     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16064       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16065                                         target_groups=self.op.target_groups)
16066     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16067       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16068                                      evac_mode=self.op.evac_mode)
16069     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16070       disk_template = self.op.disk_template
16071       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16072                                              memory=self.op.memory,
16073                                              disks=self.op.disks,
16074                                              disk_template=disk_template,
16075                                              os=self.op.os,
16076                                              tags=self.op.tags,
16077                                              nics=self.op.nics,
16078                                              vcpus=self.op.vcpus,
16079                                              spindle_use=self.op.spindle_use,
16080                                              hypervisor=self.op.hypervisor)
16081                for idx in range(self.op.count)]
16082       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16083     else:
16084       raise errors.ProgrammerError("Uncatched mode %s in"
16085                                    " LUTestAllocator.Exec", self.op.mode)
16086
16087     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16088     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16089       result = ial.in_text
16090     else:
16091       ial.Run(self.op.iallocator, validate=False)
16092       result = ial.out_text
16093     return result
16094
16095
16096 class LUNetworkAdd(LogicalUnit):
16097   """Logical unit for creating networks.
16098
16099   """
16100   HPATH = "network-add"
16101   HTYPE = constants.HTYPE_NETWORK
16102   REQ_BGL = False
16103
16104   def BuildHooksNodes(self):
16105     """Build hooks nodes.
16106
16107     """
16108     mn = self.cfg.GetMasterNode()
16109     return ([mn], [mn])
16110
16111   def CheckArguments(self):
16112     if self.op.mac_prefix:
16113       self.op.mac_prefix = \
16114         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16115
16116   def ExpandNames(self):
16117     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16118
16119     if self.op.conflicts_check:
16120       self.share_locks[locking.LEVEL_NODE] = 1
16121       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16122       self.needed_locks = {
16123         locking.LEVEL_NODE: locking.ALL_SET,
16124         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16125         }
16126     else:
16127       self.needed_locks = {}
16128
16129     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16130
16131   def CheckPrereq(self):
16132     if self.op.network is None:
16133       raise errors.OpPrereqError("Network must be given",
16134                                  errors.ECODE_INVAL)
16135
16136     uuid = self.cfg.LookupNetwork(self.op.network_name)
16137
16138     if uuid:
16139       raise errors.OpPrereqError(("Network with name '%s' already exists" %
16140                                   self.op.network_name), errors.ECODE_EXISTS)
16141
16142     # Check tag validity
16143     for tag in self.op.tags:
16144       objects.TaggableObject.ValidateTag(tag)
16145
16146   def BuildHooksEnv(self):
16147     """Build hooks env.
16148
16149     """
16150     args = {
16151       "name": self.op.network_name,
16152       "subnet": self.op.network,
16153       "gateway": self.op.gateway,
16154       "network6": self.op.network6,
16155       "gateway6": self.op.gateway6,
16156       "mac_prefix": self.op.mac_prefix,
16157       "network_type": self.op.network_type,
16158       "tags": self.op.tags,
16159       }
16160     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16161
16162   def Exec(self, feedback_fn):
16163     """Add the ip pool to the cluster.
16164
16165     """
16166     nobj = objects.Network(name=self.op.network_name,
16167                            network=self.op.network,
16168                            gateway=self.op.gateway,
16169                            network6=self.op.network6,
16170                            gateway6=self.op.gateway6,
16171                            mac_prefix=self.op.mac_prefix,
16172                            network_type=self.op.network_type,
16173                            uuid=self.network_uuid,
16174                            family=constants.IP4_VERSION)
16175     # Initialize the associated address pool
16176     try:
16177       pool = network.AddressPool.InitializeNetwork(nobj)
16178     except errors.AddressPoolError, e:
16179       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16180
16181     # Check if we need to reserve the nodes and the cluster master IP
16182     # These may not be allocated to any instances in routed mode, as
16183     # they wouldn't function anyway.
16184     if self.op.conflicts_check:
16185       for node in self.cfg.GetAllNodesInfo().values():
16186         for ip in [node.primary_ip, node.secondary_ip]:
16187           try:
16188             if pool.Contains(ip):
16189               pool.Reserve(ip)
16190               self.LogInfo("Reserved IP address of node '%s' (%s)",
16191                            node.name, ip)
16192           except errors.AddressPoolError:
16193             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16194                             node.name, ip)
16195
16196       master_ip = self.cfg.GetClusterInfo().master_ip
16197       try:
16198         if pool.Contains(master_ip):
16199           pool.Reserve(master_ip)
16200           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16201       except errors.AddressPoolError:
16202         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16203                         master_ip)
16204
16205     if self.op.add_reserved_ips:
16206       for ip in self.op.add_reserved_ips:
16207         try:
16208           pool.Reserve(ip, external=True)
16209         except errors.AddressPoolError, e:
16210           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16211
16212     if self.op.tags:
16213       for tag in self.op.tags:
16214         nobj.AddTag(tag)
16215
16216     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16217     del self.remove_locks[locking.LEVEL_NETWORK]
16218
16219
16220 class LUNetworkRemove(LogicalUnit):
16221   HPATH = "network-remove"
16222   HTYPE = constants.HTYPE_NETWORK
16223   REQ_BGL = False
16224
16225   def ExpandNames(self):
16226     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16227
16228     if not self.network_uuid:
16229       raise errors.OpPrereqError(("Network '%s' not found" %
16230                                   self.op.network_name), errors.ECODE_NOENT)
16231
16232     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16233     self.needed_locks = {
16234       locking.LEVEL_NETWORK: [self.network_uuid],
16235       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16236       }
16237
16238   def CheckPrereq(self):
16239     """Check prerequisites.
16240
16241     This checks that the given network name exists as a network, that is
16242     empty (i.e., contains no nodes), and that is not the last group of the
16243     cluster.
16244
16245     """
16246     # Verify that the network is not conncted.
16247     node_groups = [group.name
16248                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16249                    if self.network_uuid in group.networks]
16250
16251     if node_groups:
16252       self.LogWarning("Network '%s' is connected to the following"
16253                       " node groups: %s" %
16254                       (self.op.network_name,
16255                        utils.CommaJoin(utils.NiceSort(node_groups))))
16256       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16257
16258   def BuildHooksEnv(self):
16259     """Build hooks env.
16260
16261     """
16262     return {
16263       "NETWORK_NAME": self.op.network_name,
16264       }
16265
16266   def BuildHooksNodes(self):
16267     """Build hooks nodes.
16268
16269     """
16270     mn = self.cfg.GetMasterNode()
16271     return ([mn], [mn])
16272
16273   def Exec(self, feedback_fn):
16274     """Remove the network.
16275
16276     """
16277     try:
16278       self.cfg.RemoveNetwork(self.network_uuid)
16279     except errors.ConfigurationError:
16280       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16281                                (self.op.network_name, self.network_uuid))
16282
16283
16284 class LUNetworkSetParams(LogicalUnit):
16285   """Modifies the parameters of a network.
16286
16287   """
16288   HPATH = "network-modify"
16289   HTYPE = constants.HTYPE_NETWORK
16290   REQ_BGL = False
16291
16292   def CheckArguments(self):
16293     if (self.op.gateway and
16294         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16295       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16296                                  " at once", errors.ECODE_INVAL)
16297
16298   def ExpandNames(self):
16299     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16300     if self.network_uuid is None:
16301       raise errors.OpPrereqError(("Network '%s' not found" %
16302                                   self.op.network_name), errors.ECODE_NOENT)
16303
16304     self.needed_locks = {
16305       locking.LEVEL_NETWORK: [self.network_uuid],
16306       }
16307
16308   def CheckPrereq(self):
16309     """Check prerequisites.
16310
16311     """
16312     self.network = self.cfg.GetNetwork(self.network_uuid)
16313     self.gateway = self.network.gateway
16314     self.network_type = self.network.network_type
16315     self.mac_prefix = self.network.mac_prefix
16316     self.network6 = self.network.network6
16317     self.gateway6 = self.network.gateway6
16318     self.tags = self.network.tags
16319
16320     self.pool = network.AddressPool(self.network)
16321
16322     if self.op.gateway:
16323       if self.op.gateway == constants.VALUE_NONE:
16324         self.gateway = None
16325       else:
16326         self.gateway = self.op.gateway
16327         if self.pool.IsReserved(self.gateway):
16328           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16329                                      " reserved" % self.gateway,
16330                                      errors.ECODE_STATE)
16331
16332     if self.op.network_type:
16333       if self.op.network_type == constants.VALUE_NONE:
16334         self.network_type = None
16335       else:
16336         self.network_type = self.op.network_type
16337
16338     if self.op.mac_prefix:
16339       if self.op.mac_prefix == constants.VALUE_NONE:
16340         self.mac_prefix = None
16341       else:
16342         self.mac_prefix = \
16343           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16344
16345     if self.op.gateway6:
16346       if self.op.gateway6 == constants.VALUE_NONE:
16347         self.gateway6 = None
16348       else:
16349         self.gateway6 = self.op.gateway6
16350
16351     if self.op.network6:
16352       if self.op.network6 == constants.VALUE_NONE:
16353         self.network6 = None
16354       else:
16355         self.network6 = self.op.network6
16356
16357   def BuildHooksEnv(self):
16358     """Build hooks env.
16359
16360     """
16361     args = {
16362       "name": self.op.network_name,
16363       "subnet": self.network.network,
16364       "gateway": self.gateway,
16365       "network6": self.network6,
16366       "gateway6": self.gateway6,
16367       "mac_prefix": self.mac_prefix,
16368       "network_type": self.network_type,
16369       "tags": self.tags,
16370       }
16371     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16372
16373   def BuildHooksNodes(self):
16374     """Build hooks nodes.
16375
16376     """
16377     mn = self.cfg.GetMasterNode()
16378     return ([mn], [mn])
16379
16380   def Exec(self, feedback_fn):
16381     """Modifies the network.
16382
16383     """
16384     #TODO: reserve/release via temporary reservation manager
16385     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16386     if self.op.gateway:
16387       if self.gateway == self.network.gateway:
16388         self.LogWarning("Gateway is already %s", self.gateway)
16389       else:
16390         if self.gateway:
16391           self.pool.Reserve(self.gateway, external=True)
16392         if self.network.gateway:
16393           self.pool.Release(self.network.gateway, external=True)
16394         self.network.gateway = self.gateway
16395
16396     if self.op.add_reserved_ips:
16397       for ip in self.op.add_reserved_ips:
16398         try:
16399           if self.pool.IsReserved(ip):
16400             self.LogWarning("IP address %s is already reserved", ip)
16401           else:
16402             self.pool.Reserve(ip, external=True)
16403         except errors.AddressPoolError, err:
16404           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16405
16406     if self.op.remove_reserved_ips:
16407       for ip in self.op.remove_reserved_ips:
16408         if ip == self.network.gateway:
16409           self.LogWarning("Cannot unreserve Gateway's IP")
16410           continue
16411         try:
16412           if not self.pool.IsReserved(ip):
16413             self.LogWarning("IP address %s is already unreserved", ip)
16414           else:
16415             self.pool.Release(ip, external=True)
16416         except errors.AddressPoolError, err:
16417           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16418
16419     if self.op.mac_prefix:
16420       self.network.mac_prefix = self.mac_prefix
16421
16422     if self.op.network6:
16423       self.network.network6 = self.network6
16424
16425     if self.op.gateway6:
16426       self.network.gateway6 = self.gateway6
16427
16428     if self.op.network_type:
16429       self.network.network_type = self.network_type
16430
16431     self.pool.Validate()
16432
16433     self.cfg.Update(self.network, feedback_fn)
16434
16435
16436 class _NetworkQuery(_QueryBase):
16437   FIELDS = query.NETWORK_FIELDS
16438
16439   def ExpandNames(self, lu):
16440     lu.needed_locks = {}
16441     lu.share_locks = _ShareAll()
16442
16443     self.do_locking = self.use_locking
16444
16445     all_networks = lu.cfg.GetAllNetworksInfo()
16446     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16447
16448     if self.names:
16449       missing = []
16450       self.wanted = []
16451
16452       for name in self.names:
16453         if name in name_to_uuid:
16454           self.wanted.append(name_to_uuid[name])
16455         else:
16456           missing.append(name)
16457
16458       if missing:
16459         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16460                                    errors.ECODE_NOENT)
16461     else:
16462       self.wanted = locking.ALL_SET
16463
16464     if self.do_locking:
16465       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16466       if query.NETQ_INST in self.requested_data:
16467         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16468       if query.NETQ_GROUP in self.requested_data:
16469         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16470
16471   def DeclareLocks(self, lu, level):
16472     pass
16473
16474   def _GetQueryData(self, lu):
16475     """Computes the list of networks and their attributes.
16476
16477     """
16478     all_networks = lu.cfg.GetAllNetworksInfo()
16479
16480     network_uuids = self._GetNames(lu, all_networks.keys(),
16481                                    locking.LEVEL_NETWORK)
16482
16483     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16484
16485     do_instances = query.NETQ_INST in self.requested_data
16486     do_groups = query.NETQ_GROUP in self.requested_data
16487
16488     network_to_instances = None
16489     network_to_groups = None
16490
16491     # For NETQ_GROUP, we need to map network->[groups]
16492     if do_groups:
16493       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16494       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16495       for _, group in all_groups.iteritems():
16496         for net_uuid in network_uuids:
16497           netparams = group.networks.get(net_uuid, None)
16498           if netparams:
16499             info = (group.name, netparams[constants.NIC_MODE],
16500                     netparams[constants.NIC_LINK])
16501
16502             network_to_groups[net_uuid].append(info)
16503
16504     if do_instances:
16505       all_instances = lu.cfg.GetAllInstancesInfo()
16506       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16507       for instance in all_instances.values():
16508         for nic in instance.nics:
16509           if nic.network:
16510             net_uuid = name_to_uuid[nic.network]
16511             if net_uuid in network_uuids:
16512               network_to_instances[net_uuid].append(instance.name)
16513             break
16514
16515     if query.NETQ_STATS in self.requested_data:
16516       stats = \
16517         dict((uuid,
16518               self._GetStats(network.AddressPool(all_networks[uuid])))
16519              for uuid in network_uuids)
16520     else:
16521       stats = None
16522
16523     return query.NetworkQueryData([all_networks[uuid]
16524                                    for uuid in network_uuids],
16525                                    network_to_groups,
16526                                    network_to_instances,
16527                                    stats)
16528
16529   @staticmethod
16530   def _GetStats(pool):
16531     """Returns statistics for a network address pool.
16532
16533     """
16534     return {
16535       "free_count": pool.GetFreeCount(),
16536       "reserved_count": pool.GetReservedCount(),
16537       "map": pool.GetMap(),
16538       "external_reservations":
16539         utils.CommaJoin(pool.GetExternalReservations()),
16540       }
16541
16542
16543 class LUNetworkQuery(NoHooksLU):
16544   """Logical unit for querying networks.
16545
16546   """
16547   REQ_BGL = False
16548
16549   def CheckArguments(self):
16550     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16551                             self.op.output_fields, self.op.use_locking)
16552
16553   def ExpandNames(self):
16554     self.nq.ExpandNames(self)
16555
16556   def Exec(self, feedback_fn):
16557     return self.nq.OldStyleQuery(self)
16558
16559
16560 class LUNetworkConnect(LogicalUnit):
16561   """Connect a network to a nodegroup
16562
16563   """
16564   HPATH = "network-connect"
16565   HTYPE = constants.HTYPE_NETWORK
16566   REQ_BGL = False
16567
16568   def ExpandNames(self):
16569     self.network_name = self.op.network_name
16570     self.group_name = self.op.group_name
16571     self.network_mode = self.op.network_mode
16572     self.network_link = self.op.network_link
16573
16574     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16575     if self.network_uuid is None:
16576       raise errors.OpPrereqError("Network '%s' does not exist" %
16577                                  self.network_name, errors.ECODE_NOENT)
16578
16579     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16580     if self.group_uuid is None:
16581       raise errors.OpPrereqError("Group '%s' does not exist" %
16582                                  self.group_name, errors.ECODE_NOENT)
16583
16584     self.needed_locks = {
16585       locking.LEVEL_INSTANCE: [],
16586       locking.LEVEL_NODEGROUP: [self.group_uuid],
16587       }
16588     self.share_locks[locking.LEVEL_INSTANCE] = 1
16589
16590     if self.op.conflicts_check:
16591       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16592       self.share_locks[locking.LEVEL_NETWORK] = 1
16593
16594   def DeclareLocks(self, level):
16595     if level == locking.LEVEL_INSTANCE:
16596       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16597
16598       # Lock instances optimistically, needs verification once group lock has
16599       # been acquired
16600       if self.op.conflicts_check:
16601         self.needed_locks[locking.LEVEL_INSTANCE] = \
16602             self.cfg.GetNodeGroupInstances(self.group_uuid)
16603
16604   def BuildHooksEnv(self):
16605     ret = {
16606       "GROUP_NAME": self.group_name,
16607       "GROUP_NETWORK_MODE": self.network_mode,
16608       "GROUP_NETWORK_LINK": self.network_link,
16609       }
16610     return ret
16611
16612   def BuildHooksNodes(self):
16613     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16614     return (nodes, nodes)
16615
16616   def CheckPrereq(self):
16617     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16618
16619     assert self.group_uuid in owned_groups
16620
16621     self.netparams = {
16622       constants.NIC_MODE: self.network_mode,
16623       constants.NIC_LINK: self.network_link,
16624       }
16625     objects.NIC.CheckParameterSyntax(self.netparams)
16626
16627     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16628     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16629     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16630     self.connected = False
16631     if self.network_uuid in self.group.networks:
16632       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16633                       (self.network_name, self.group.name))
16634       self.connected = True
16635       return
16636
16637     if self.op.conflicts_check:
16638       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16639
16640       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16641                             "connect to")
16642
16643   def Exec(self, feedback_fn):
16644     if self.connected:
16645       return
16646
16647     self.group.networks[self.network_uuid] = self.netparams
16648     self.cfg.Update(self.group, feedback_fn)
16649
16650
16651 def _NetworkConflictCheck(lu, check_fn, action):
16652   """Checks for network interface conflicts with a network.
16653
16654   @type lu: L{LogicalUnit}
16655   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16656     returning boolean
16657   @param check_fn: Function checking for conflict
16658   @type action: string
16659   @param action: Part of error message (see code)
16660   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16661
16662   """
16663   # Check if locked instances are still correct
16664   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16665   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16666
16667   conflicts = []
16668
16669   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16670     instconflicts = [(idx, nic.ip)
16671                      for (idx, nic) in enumerate(instance.nics)
16672                      if check_fn(nic)]
16673
16674     if instconflicts:
16675       conflicts.append((instance.name, instconflicts))
16676
16677   if conflicts:
16678     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16679                   " node group '%s', are in use: %s" %
16680                   (lu.network_name, action, lu.group.name,
16681                    utils.CommaJoin(("%s: %s" %
16682                                     (name, _FmtNetworkConflict(details)))
16683                                    for (name, details) in conflicts)))
16684
16685     raise errors.OpPrereqError("Conflicting IP addresses found; "
16686                                " remove/modify the corresponding network"
16687                                " interfaces", errors.ECODE_STATE)
16688
16689
16690 def _FmtNetworkConflict(details):
16691   """Utility for L{_NetworkConflictCheck}.
16692
16693   """
16694   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16695                          for (idx, ipaddr) in details)
16696
16697
16698 class LUNetworkDisconnect(LogicalUnit):
16699   """Disconnect a network to a nodegroup
16700
16701   """
16702   HPATH = "network-disconnect"
16703   HTYPE = constants.HTYPE_NETWORK
16704   REQ_BGL = False
16705
16706   def ExpandNames(self):
16707     self.network_name = self.op.network_name
16708     self.group_name = self.op.group_name
16709
16710     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16711     if self.network_uuid is None:
16712       raise errors.OpPrereqError("Network '%s' does not exist" %
16713                                  self.network_name, errors.ECODE_NOENT)
16714
16715     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16716     if self.group_uuid is None:
16717       raise errors.OpPrereqError("Group '%s' does not exist" %
16718                                  self.group_name, errors.ECODE_NOENT)
16719
16720     self.needed_locks = {
16721       locking.LEVEL_INSTANCE: [],
16722       locking.LEVEL_NODEGROUP: [self.group_uuid],
16723       }
16724     self.share_locks[locking.LEVEL_INSTANCE] = 1
16725
16726   def DeclareLocks(self, level):
16727     if level == locking.LEVEL_INSTANCE:
16728       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16729
16730       # Lock instances optimistically, needs verification once group lock has
16731       # been acquired
16732       if self.op.conflicts_check:
16733         self.needed_locks[locking.LEVEL_INSTANCE] = \
16734           self.cfg.GetNodeGroupInstances(self.group_uuid)
16735
16736   def BuildHooksEnv(self):
16737     ret = {
16738       "GROUP_NAME": self.group_name,
16739       }
16740     return ret
16741
16742   def BuildHooksNodes(self):
16743     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16744     return (nodes, nodes)
16745
16746   def CheckPrereq(self):
16747     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16748
16749     assert self.group_uuid in owned_groups
16750
16751     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16752     self.connected = True
16753     if self.network_uuid not in self.group.networks:
16754       self.LogWarning("Network '%s' is not mapped to group '%s'",
16755                       self.network_name, self.group.name)
16756       self.connected = False
16757       return
16758
16759     if self.op.conflicts_check:
16760       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16761                             "disconnect from")
16762
16763   def Exec(self, feedback_fn):
16764     if not self.connected:
16765       return
16766
16767     del self.group.networks[self.network_uuid]
16768     self.cfg.Update(self.group, feedback_fn)
16769
16770
16771 #: Query type implementations
16772 _QUERY_IMPL = {
16773   constants.QR_CLUSTER: _ClusterQuery,
16774   constants.QR_INSTANCE: _InstanceQuery,
16775   constants.QR_NODE: _NodeQuery,
16776   constants.QR_GROUP: _GroupQuery,
16777   constants.QR_NETWORK: _NetworkQuery,
16778   constants.QR_OS: _OsQuery,
16779   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16780   constants.QR_EXPORT: _ExportQuery,
16781   }
16782
16783 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16784
16785
16786 def _GetQueryImplementation(name):
16787   """Returns the implemtnation for a query type.
16788
16789   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16790
16791   """
16792   try:
16793     return _QUERY_IMPL[name]
16794   except KeyError:
16795     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16796                                errors.ECODE_INVAL)
16797
16798
16799 def _CheckForConflictingIp(lu, ip, node):
16800   """In case of conflicting IP address raise error.
16801
16802   @type ip: string
16803   @param ip: IP address
16804   @type node: string
16805   @param node: node name
16806
16807   """
16808   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16809   if conf_net is not None:
16810     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16811                                 (ip, conf_net)),
16812                                errors.ECODE_STATE)
16813
16814   return (None, None)