code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024   """Make sure that none of the given paramters is global.
1025
1026   If a global parameter is found, an L{errors.OpPrereqError} exception is
1027   raised. This is used to avoid setting global parameters for individual nodes.
1028
1029   @type params: dictionary
1030   @param params: Parameters to check
1031   @type glob_pars: dictionary
1032   @param glob_pars: Forbidden parameters
1033   @type kind: string
1034   @param kind: Kind of parameters (e.g. "node")
1035   @type bad_levels: string
1036   @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1037       "instance")
1038   @type good_levels: strings
1039   @param good_levels: Level(s) at which the parameters are allowed (e.g.
1040       "cluster or group")
1041
1042   """
1043   used_globals = glob_pars.intersection(params)
1044   if used_globals:
1045     msg = ("The following %s parameters are global and cannot"
1046            " be customized at %s level, please modify them at"
1047            " %s level: %s" %
1048            (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051
1052 def _CheckNodeOnline(lu, node, msg=None):
1053   """Ensure that a given node is online.
1054
1055   @param lu: the LU on behalf of which we make the check
1056   @param node: the node to check
1057   @param msg: if passed, should be a message to replace the default one
1058   @raise errors.OpPrereqError: if the node is offline
1059
1060   """
1061   if msg is None:
1062     msg = "Can't use offline node"
1063   if lu.cfg.GetNodeInfo(node).offline:
1064     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066
1067 def _CheckNodeNotDrained(lu, node):
1068   """Ensure that a given node is not drained.
1069
1070   @param lu: the LU on behalf of which we make the check
1071   @param node: the node to check
1072   @raise errors.OpPrereqError: if the node is drained
1073
1074   """
1075   if lu.cfg.GetNodeInfo(node).drained:
1076     raise errors.OpPrereqError("Can't use drained node %s" % node,
1077                                errors.ECODE_STATE)
1078
1079
1080 def _CheckNodeVmCapable(lu, node):
1081   """Ensure that a given node is vm capable.
1082
1083   @param lu: the LU on behalf of which we make the check
1084   @param node: the node to check
1085   @raise errors.OpPrereqError: if the node is not vm capable
1086
1087   """
1088   if not lu.cfg.GetNodeInfo(node).vm_capable:
1089     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1090                                errors.ECODE_STATE)
1091
1092
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094   """Ensure that a node supports a given OS.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param node: the node to check
1098   @param os_name: the OS to query about
1099   @param force_variant: whether to ignore variant errors
1100   @raise errors.OpPrereqError: if the node is not supporting the OS
1101
1102   """
1103   result = lu.rpc.call_os_get(node, os_name)
1104   result.Raise("OS '%s' not in supported OS list for node %s" %
1105                (os_name, node),
1106                prereq=True, ecode=errors.ECODE_INVAL)
1107   if not force_variant:
1108     _CheckOSVariant(result.payload, os_name)
1109
1110
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112   """Ensure that a node has the given secondary ip.
1113
1114   @type lu: L{LogicalUnit}
1115   @param lu: the LU on behalf of which we make the check
1116   @type node: string
1117   @param node: the node to check
1118   @type secondary_ip: string
1119   @param secondary_ip: the ip to check
1120   @type prereq: boolean
1121   @param prereq: whether to throw a prerequisite or an execute error
1122   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1124
1125   """
1126   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127   result.Raise("Failure checking secondary ip on node %s" % node,
1128                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129   if not result.payload:
1130     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131            " please fix and re-run this command" % secondary_ip)
1132     if prereq:
1133       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1134     else:
1135       raise errors.OpExecError(msg)
1136
1137
1138 def _CheckNodePVs(nresult, exclusive_storage):
1139   """Check node PVs.
1140
1141   """
1142   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143   if pvlist_dict is None:
1144     return (["Can't get PV list from node"], None)
1145   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1146   errlist = []
1147   # check that ':' is not present in PV names, since it's a
1148   # special character for lvcreate (denotes the range of PEs to
1149   # use on the PV)
1150   for pv in pvlist:
1151     if ":" in pv.name:
1152       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153                      (pv.name, pv.vg_name))
1154   es_pvinfo = None
1155   if exclusive_storage:
1156     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157     errlist.extend(errmsgs)
1158     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1159     if shared_pvs:
1160       for (pvname, lvlist) in shared_pvs:
1161         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163                        (pvname, utils.CommaJoin(lvlist)))
1164   return (errlist, es_pvinfo)
1165
1166
1167 def _GetClusterDomainSecret():
1168   """Reads the cluster domain secret.
1169
1170   """
1171   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1172                                strict=True)
1173
1174
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176   """Ensure that an instance is in one of the required states.
1177
1178   @param lu: the LU on behalf of which we make the check
1179   @param instance: the instance to check
1180   @param msg: if passed, should be a message to replace the default one
1181   @raise errors.OpPrereqError: if the instance is not in the required state
1182
1183   """
1184   if msg is None:
1185     msg = ("can't use instance from outside %s states" %
1186            utils.CommaJoin(req_states))
1187   if instance.admin_state not in req_states:
1188     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189                                (instance.name, instance.admin_state, msg),
1190                                errors.ECODE_STATE)
1191
1192   if constants.ADMINST_UP not in req_states:
1193     pnode = instance.primary_node
1194     if not lu.cfg.GetNodeInfo(pnode).offline:
1195       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197                   prereq=True, ecode=errors.ECODE_ENVIRON)
1198       if instance.name in ins_l.payload:
1199         raise errors.OpPrereqError("Instance %s is running, %s" %
1200                                    (instance.name, msg), errors.ECODE_STATE)
1201     else:
1202       lu.LogWarning("Primary node offline, ignoring check that instance"
1203                      " is down")
1204
1205
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207   """Computes if value is in the desired range.
1208
1209   @param name: name of the parameter for which we perform the check
1210   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1211       not just 'disk')
1212   @param ipolicy: dictionary containing min, max and std values
1213   @param value: actual value that we want to use
1214   @return: None or element not meeting the criteria
1215
1216
1217   """
1218   if value in [None, constants.VALUE_AUTO]:
1219     return None
1220   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222   if value > max_v or min_v > value:
1223     if qualifier:
1224       fqn = "%s/%s" % (name, qualifier)
1225     else:
1226       fqn = name
1227     return ("%s value %s is not in range [%s, %s]" %
1228             (fqn, value, min_v, max_v))
1229   return None
1230
1231
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233                                  nic_count, disk_sizes, spindle_use,
1234                                  _compute_fn=_ComputeMinMaxSpec):
1235   """Verifies ipolicy against provided specs.
1236
1237   @type ipolicy: dict
1238   @param ipolicy: The ipolicy
1239   @type mem_size: int
1240   @param mem_size: The memory size
1241   @type cpu_count: int
1242   @param cpu_count: Used cpu cores
1243   @type disk_count: int
1244   @param disk_count: Number of disks used
1245   @type nic_count: int
1246   @param nic_count: Number of nics used
1247   @type disk_sizes: list of ints
1248   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249   @type spindle_use: int
1250   @param spindle_use: The number of spindles this instance uses
1251   @param _compute_fn: The compute function (unittest only)
1252   @return: A list of violations, or an empty list of no violations are found
1253
1254   """
1255   assert disk_count == len(disk_sizes)
1256
1257   test_settings = [
1258     (constants.ISPEC_MEM_SIZE, "", mem_size),
1259     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260     (constants.ISPEC_DISK_COUNT, "", disk_count),
1261     (constants.ISPEC_NIC_COUNT, "", nic_count),
1262     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264          for idx, d in enumerate(disk_sizes)]
1265
1266   return filter(None,
1267                 (_compute_fn(name, qualifier, ipolicy, value)
1268                  for (name, qualifier, value) in test_settings))
1269
1270
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272                                      _compute_fn=_ComputeIPolicySpecViolation):
1273   """Compute if instance meets the specs of ipolicy.
1274
1275   @type ipolicy: dict
1276   @param ipolicy: The ipolicy to verify against
1277   @type instance: L{objects.Instance}
1278   @param instance: The instance to verify
1279   @param _compute_fn: The function to verify ipolicy (unittest only)
1280   @see: L{_ComputeIPolicySpecViolation}
1281
1282   """
1283   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286   disk_count = len(instance.disks)
1287   disk_sizes = [disk.size for disk in instance.disks]
1288   nic_count = len(instance.nics)
1289
1290   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291                      disk_sizes, spindle_use)
1292
1293
1294 def _ComputeIPolicyInstanceSpecViolation(
1295   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296   """Compute if instance specs meets the specs of ipolicy.
1297
1298   @type ipolicy: dict
1299   @param ipolicy: The ipolicy to verify against
1300   @param instance_spec: dict
1301   @param instance_spec: The instance spec to verify
1302   @param _compute_fn: The function to verify ipolicy (unittest only)
1303   @see: L{_ComputeIPolicySpecViolation}
1304
1305   """
1306   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1312
1313   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314                      disk_sizes, spindle_use)
1315
1316
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1318                                  target_group,
1319                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1320   """Compute if instance meets the specs of the new target group.
1321
1322   @param ipolicy: The ipolicy to verify
1323   @param instance: The instance object to verify
1324   @param current_group: The current group of the instance
1325   @param target_group: The new group of the instance
1326   @param _compute_fn: The function to verify ipolicy (unittest only)
1327   @see: L{_ComputeIPolicySpecViolation}
1328
1329   """
1330   if current_group == target_group:
1331     return []
1332   else:
1333     return _compute_fn(ipolicy, instance)
1334
1335
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337                             _compute_fn=_ComputeIPolicyNodeViolation):
1338   """Checks that the target node is correct in terms of instance policy.
1339
1340   @param ipolicy: The ipolicy to verify
1341   @param instance: The instance object to verify
1342   @param node: The new node to relocate
1343   @param ignore: Ignore violations of the ipolicy
1344   @param _compute_fn: The function to verify ipolicy (unittest only)
1345   @see: L{_ComputeIPolicySpecViolation}
1346
1347   """
1348   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1350
1351   if res:
1352     msg = ("Instance does not meet target node group's (%s) instance"
1353            " policy: %s") % (node.group, utils.CommaJoin(res))
1354     if ignore:
1355       lu.LogWarning(msg)
1356     else:
1357       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1358
1359
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361   """Computes a set of any instances that would violate the new ipolicy.
1362
1363   @param old_ipolicy: The current (still in-place) ipolicy
1364   @param new_ipolicy: The new (to become) ipolicy
1365   @param instances: List of instances to verify
1366   @return: A list of instances which violates the new ipolicy but
1367       did not before
1368
1369   """
1370   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371           _ComputeViolatingInstances(old_ipolicy, instances))
1372
1373
1374 def _ExpandItemName(fn, name, kind):
1375   """Expand an item name.
1376
1377   @param fn: the function to use for expansion
1378   @param name: requested item name
1379   @param kind: text description ('Node' or 'Instance')
1380   @return: the resolved (full) name
1381   @raise errors.OpPrereqError: if the item is not found
1382
1383   """
1384   full_name = fn(name)
1385   if full_name is None:
1386     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1387                                errors.ECODE_NOENT)
1388   return full_name
1389
1390
1391 def _ExpandNodeName(cfg, name):
1392   """Wrapper over L{_ExpandItemName} for nodes."""
1393   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1394
1395
1396 def _ExpandInstanceName(cfg, name):
1397   """Wrapper over L{_ExpandItemName} for instance."""
1398   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1399
1400
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1402                          mac_prefix, tags):
1403   """Builds network related env variables for hooks
1404
1405   This builds the hook environment from individual variables.
1406
1407   @type name: string
1408   @param name: the name of the network
1409   @type subnet: string
1410   @param subnet: the ipv4 subnet
1411   @type gateway: string
1412   @param gateway: the ipv4 gateway
1413   @type network6: string
1414   @param network6: the ipv6 subnet
1415   @type gateway6: string
1416   @param gateway6: the ipv6 gateway
1417   @type mac_prefix: string
1418   @param mac_prefix: the mac_prefix
1419   @type tags: list
1420   @param tags: the tags of the network
1421
1422   """
1423   env = {}
1424   if name:
1425     env["NETWORK_NAME"] = name
1426   if subnet:
1427     env["NETWORK_SUBNET"] = subnet
1428   if gateway:
1429     env["NETWORK_GATEWAY"] = gateway
1430   if network6:
1431     env["NETWORK_SUBNET6"] = network6
1432   if gateway6:
1433     env["NETWORK_GATEWAY6"] = gateway6
1434   if mac_prefix:
1435     env["NETWORK_MAC_PREFIX"] = mac_prefix
1436   if tags:
1437     env["NETWORK_TAGS"] = " ".join(tags)
1438
1439   return env
1440
1441
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443                           minmem, maxmem, vcpus, nics, disk_template, disks,
1444                           bep, hvp, hypervisor_name, tags):
1445   """Builds instance related env variables for hooks
1446
1447   This builds the hook environment from individual variables.
1448
1449   @type name: string
1450   @param name: the name of the instance
1451   @type primary_node: string
1452   @param primary_node: the name of the instance's primary node
1453   @type secondary_nodes: list
1454   @param secondary_nodes: list of secondary nodes as strings
1455   @type os_type: string
1456   @param os_type: the name of the instance's OS
1457   @type status: string
1458   @param status: the desired status of the instance
1459   @type minmem: string
1460   @param minmem: the minimum memory size of the instance
1461   @type maxmem: string
1462   @param maxmem: the maximum memory size of the instance
1463   @type vcpus: string
1464   @param vcpus: the count of VCPUs the instance has
1465   @type nics: list
1466   @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467       the NICs the instance has
1468   @type disk_template: string
1469   @param disk_template: the disk template of the instance
1470   @type disks: list
1471   @param disks: the list of (size, mode) pairs
1472   @type bep: dict
1473   @param bep: the backend parameters for the instance
1474   @type hvp: dict
1475   @param hvp: the hypervisor parameters for the instance
1476   @type hypervisor_name: string
1477   @param hypervisor_name: the hypervisor for the instance
1478   @type tags: list
1479   @param tags: list of instance tags as strings
1480   @rtype: dict
1481   @return: the hook environment for this instance
1482
1483   """
1484   env = {
1485     "OP_TARGET": name,
1486     "INSTANCE_NAME": name,
1487     "INSTANCE_PRIMARY": primary_node,
1488     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489     "INSTANCE_OS_TYPE": os_type,
1490     "INSTANCE_STATUS": status,
1491     "INSTANCE_MINMEM": minmem,
1492     "INSTANCE_MAXMEM": maxmem,
1493     # TODO(2.7) remove deprecated "memory" value
1494     "INSTANCE_MEMORY": maxmem,
1495     "INSTANCE_VCPUS": vcpus,
1496     "INSTANCE_DISK_TEMPLATE": disk_template,
1497     "INSTANCE_HYPERVISOR": hypervisor_name,
1498   }
1499   if nics:
1500     nic_count = len(nics)
1501     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1502       if ip is None:
1503         ip = ""
1504       env["INSTANCE_NIC%d_IP" % idx] = ip
1505       env["INSTANCE_NIC%d_MAC" % idx] = mac
1506       env["INSTANCE_NIC%d_MODE" % idx] = mode
1507       env["INSTANCE_NIC%d_LINK" % idx] = link
1508       if netinfo:
1509         nobj = objects.Network.FromDict(netinfo)
1510         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1511       elif network:
1512         # FIXME: broken network reference: the instance NIC specifies a
1513         # network, but the relevant network entry was not in the config. This
1514         # should be made impossible.
1515         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   ip = nic.ip
1555   mac = nic.mac
1556   cluster = lu.cfg.GetClusterInfo()
1557   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558   mode = filled_params[constants.NIC_MODE]
1559   link = filled_params[constants.NIC_LINK]
1560   net = nic.network
1561   netinfo = None
1562   if net:
1563     net_uuid = lu.cfg.LookupNetwork(net)
1564     if net_uuid:
1565       nobj = lu.cfg.GetNetwork(net_uuid)
1566       netinfo = objects.Network.ToDict(nobj)
1567   return (ip, mac, mode, link, net, netinfo)
1568
1569
1570 def _NICListToTuple(lu, nics):
1571   """Build a list of nic information tuples.
1572
1573   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574   value in LUInstanceQueryData.
1575
1576   @type lu:  L{LogicalUnit}
1577   @param lu: the logical unit on whose behalf we execute
1578   @type nics: list of L{objects.NIC}
1579   @param nics: list of nics to convert to hooks tuples
1580
1581   """
1582   hooks_nics = []
1583   for nic in nics:
1584     hooks_nics.append(_NICToTuple(lu, nic))
1585   return hooks_nics
1586
1587
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589   """Builds instance related env variables for hooks from an object.
1590
1591   @type lu: L{LogicalUnit}
1592   @param lu: the logical unit on whose behalf we execute
1593   @type instance: L{objects.Instance}
1594   @param instance: the instance for which we should build the
1595       environment
1596   @type override: dict
1597   @param override: dictionary with key/values that will override
1598       our values
1599   @rtype: dict
1600   @return: the hook environment dictionary
1601
1602   """
1603   cluster = lu.cfg.GetClusterInfo()
1604   bep = cluster.FillBE(instance)
1605   hvp = cluster.FillHV(instance)
1606   args = {
1607     "name": instance.name,
1608     "primary_node": instance.primary_node,
1609     "secondary_nodes": instance.secondary_nodes,
1610     "os_type": instance.os,
1611     "status": instance.admin_state,
1612     "maxmem": bep[constants.BE_MAXMEM],
1613     "minmem": bep[constants.BE_MINMEM],
1614     "vcpus": bep[constants.BE_VCPUS],
1615     "nics": _NICListToTuple(lu, instance.nics),
1616     "disk_template": instance.disk_template,
1617     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1618     "bep": bep,
1619     "hvp": hvp,
1620     "hypervisor_name": instance.hypervisor,
1621     "tags": instance.tags,
1622   }
1623   if override:
1624     args.update(override)
1625   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1626
1627
1628 def _AdjustCandidatePool(lu, exceptions):
1629   """Adjust the candidate pool after node operations.
1630
1631   """
1632   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1633   if mod_list:
1634     lu.LogInfo("Promoted nodes to master candidate role: %s",
1635                utils.CommaJoin(node.name for node in mod_list))
1636     for name in mod_list:
1637       lu.context.ReaddNode(name)
1638   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1639   if mc_now > mc_max:
1640     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1641                (mc_now, mc_max))
1642
1643
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645   """Decide whether I should promote myself as a master candidate.
1646
1647   """
1648   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650   # the new node will increase mc_max with one, so:
1651   mc_should = min(mc_should + 1, cp_size)
1652   return mc_now < mc_should
1653
1654
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656   """Computes a set of instances who violates given ipolicy.
1657
1658   @param ipolicy: The ipolicy to verify
1659   @type instances: object.Instance
1660   @param instances: List of instances to verify
1661   @return: A frozenset of instance names violating the ipolicy
1662
1663   """
1664   return frozenset([inst.name for inst in instances
1665                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1666
1667
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669   """Check that the brigdes needed by a list of nics exist.
1670
1671   """
1672   cluster = lu.cfg.GetClusterInfo()
1673   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674   brlist = [params[constants.NIC_LINK] for params in paramslist
1675             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1676   if brlist:
1677     result = lu.rpc.call_bridges_exist(target_node, brlist)
1678     result.Raise("Error checking bridges on destination node '%s'" %
1679                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1680
1681
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683   """Check that the brigdes needed by an instance exist.
1684
1685   """
1686   if node is None:
1687     node = instance.primary_node
1688   _CheckNicsBridgesExist(lu, instance.nics, node)
1689
1690
1691 def _CheckOSVariant(os_obj, name):
1692   """Check whether an OS name conforms to the os variants specification.
1693
1694   @type os_obj: L{objects.OS}
1695   @param os_obj: OS object to check
1696   @type name: string
1697   @param name: OS name passed by the user, to check for validity
1698
1699   """
1700   variant = objects.OS.GetVariant(name)
1701   if not os_obj.supported_variants:
1702     if variant:
1703       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704                                  " passed)" % (os_obj.name, variant),
1705                                  errors.ECODE_INVAL)
1706     return
1707   if not variant:
1708     raise errors.OpPrereqError("OS name must include a variant",
1709                                errors.ECODE_INVAL)
1710
1711   if variant not in os_obj.supported_variants:
1712     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1713
1714
1715 def _GetNodeInstancesInner(cfg, fn):
1716   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1717
1718
1719 def _GetNodeInstances(cfg, node_name):
1720   """Returns a list of all primary and secondary instances on a node.
1721
1722   """
1723
1724   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1725
1726
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728   """Returns primary instances on a node.
1729
1730   """
1731   return _GetNodeInstancesInner(cfg,
1732                                 lambda inst: node_name == inst.primary_node)
1733
1734
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736   """Returns secondary instances on a node.
1737
1738   """
1739   return _GetNodeInstancesInner(cfg,
1740                                 lambda inst: node_name in inst.secondary_nodes)
1741
1742
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744   """Returns the arguments for a storage type.
1745
1746   """
1747   # Special case for file storage
1748   if storage_type == constants.ST_FILE:
1749     # storage.FileStorage wants a list of storage directories
1750     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1751
1752   return []
1753
1754
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1756   faulty = []
1757
1758   for dev in instance.disks:
1759     cfg.SetDiskID(dev, node_name)
1760
1761   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1762                                                                 instance))
1763   result.Raise("Failed to get disk status from node %s" % node_name,
1764                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1765
1766   for idx, bdev_status in enumerate(result.payload):
1767     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768       faulty.append(idx)
1769
1770   return faulty
1771
1772
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774   """Check the sanity of iallocator and node arguments and use the
1775   cluster-wide iallocator if appropriate.
1776
1777   Check that at most one of (iallocator, node) is specified. If none is
1778   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779   then the LU's opcode's iallocator slot is filled with the cluster-wide
1780   default iallocator.
1781
1782   @type iallocator_slot: string
1783   @param iallocator_slot: the name of the opcode iallocator slot
1784   @type node_slot: string
1785   @param node_slot: the name of the opcode target node slot
1786
1787   """
1788   node = getattr(lu.op, node_slot, None)
1789   ialloc = getattr(lu.op, iallocator_slot, None)
1790   if node == []:
1791     node = None
1792
1793   if node is not None and ialloc is not None:
1794     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1795                                errors.ECODE_INVAL)
1796   elif ((node is None and ialloc is None) or
1797         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798     default_iallocator = lu.cfg.GetDefaultIAllocator()
1799     if default_iallocator:
1800       setattr(lu.op, iallocator_slot, default_iallocator)
1801     else:
1802       raise errors.OpPrereqError("No iallocator or node given and no"
1803                                  " cluster-wide default iallocator found;"
1804                                  " please specify either an iallocator or a"
1805                                  " node, or set a cluster-wide default"
1806                                  " iallocator", errors.ECODE_INVAL)
1807
1808
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810   """Decides on which iallocator to use.
1811
1812   @type cfg: L{config.ConfigWriter}
1813   @param cfg: Cluster configuration object
1814   @type ialloc: string or None
1815   @param ialloc: Iallocator specified in opcode
1816   @rtype: string
1817   @return: Iallocator name
1818
1819   """
1820   if not ialloc:
1821     # Use default iallocator
1822     ialloc = cfg.GetDefaultIAllocator()
1823
1824   if not ialloc:
1825     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826                                " opcode nor as a cluster-wide default",
1827                                errors.ECODE_INVAL)
1828
1829   return ialloc
1830
1831
1832 def _CheckHostnameSane(lu, name):
1833   """Ensures that a given hostname resolves to a 'sane' name.
1834
1835   The given name is required to be a prefix of the resolved hostname,
1836   to prevent accidental mismatches.
1837
1838   @param lu: the logical unit on behalf of which we're checking
1839   @param name: the name we should resolve and check
1840   @return: the resolved hostname object
1841
1842   """
1843   hostname = netutils.GetHostname(name=name)
1844   if hostname.name != name:
1845     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846   if not utils.MatchNameComponent(name, [hostname.name]):
1847     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848                                 " same as given hostname '%s'") %
1849                                 (hostname.name, name), errors.ECODE_INVAL)
1850   return hostname
1851
1852
1853 class LUClusterPostInit(LogicalUnit):
1854   """Logical unit for running hooks after cluster initialization.
1855
1856   """
1857   HPATH = "cluster-init"
1858   HTYPE = constants.HTYPE_CLUSTER
1859
1860   def BuildHooksEnv(self):
1861     """Build hooks env.
1862
1863     """
1864     return {
1865       "OP_TARGET": self.cfg.GetClusterName(),
1866       }
1867
1868   def BuildHooksNodes(self):
1869     """Build hooks nodes.
1870
1871     """
1872     return ([], [self.cfg.GetMasterNode()])
1873
1874   def Exec(self, feedback_fn):
1875     """Nothing to do.
1876
1877     """
1878     return True
1879
1880
1881 class LUClusterDestroy(LogicalUnit):
1882   """Logical unit for destroying the cluster.
1883
1884   """
1885   HPATH = "cluster-destroy"
1886   HTYPE = constants.HTYPE_CLUSTER
1887
1888   def BuildHooksEnv(self):
1889     """Build hooks env.
1890
1891     """
1892     return {
1893       "OP_TARGET": self.cfg.GetClusterName(),
1894       }
1895
1896   def BuildHooksNodes(self):
1897     """Build hooks nodes.
1898
1899     """
1900     return ([], [])
1901
1902   def CheckPrereq(self):
1903     """Check prerequisites.
1904
1905     This checks whether the cluster is empty.
1906
1907     Any errors are signaled by raising errors.OpPrereqError.
1908
1909     """
1910     master = self.cfg.GetMasterNode()
1911
1912     nodelist = self.cfg.GetNodeList()
1913     if len(nodelist) != 1 or nodelist[0] != master:
1914       raise errors.OpPrereqError("There are still %d node(s) in"
1915                                  " this cluster." % (len(nodelist) - 1),
1916                                  errors.ECODE_INVAL)
1917     instancelist = self.cfg.GetInstanceList()
1918     if instancelist:
1919       raise errors.OpPrereqError("There are still %d instance(s) in"
1920                                  " this cluster." % len(instancelist),
1921                                  errors.ECODE_INVAL)
1922
1923   def Exec(self, feedback_fn):
1924     """Destroys the cluster.
1925
1926     """
1927     master_params = self.cfg.GetMasterNetworkParameters()
1928
1929     # Run post hooks on master node before it's removed
1930     _RunPostHook(self, master_params.name)
1931
1932     ems = self.cfg.GetUseExternalMipScript()
1933     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1934                                                      master_params, ems)
1935     if result.fail_msg:
1936       self.LogWarning("Error disabling the master IP address: %s",
1937                       result.fail_msg)
1938
1939     return master_params.name
1940
1941
1942 def _VerifyCertificate(filename):
1943   """Verifies a certificate for L{LUClusterVerifyConfig}.
1944
1945   @type filename: string
1946   @param filename: Path to PEM file
1947
1948   """
1949   try:
1950     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951                                            utils.ReadFile(filename))
1952   except Exception, err: # pylint: disable=W0703
1953     return (LUClusterVerifyConfig.ETYPE_ERROR,
1954             "Failed to load X509 certificate %s: %s" % (filename, err))
1955
1956   (errcode, msg) = \
1957     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958                                 constants.SSL_CERT_EXPIRATION_ERROR)
1959
1960   if msg:
1961     fnamemsg = "While verifying %s: %s" % (filename, msg)
1962   else:
1963     fnamemsg = None
1964
1965   if errcode is None:
1966     return (None, fnamemsg)
1967   elif errcode == utils.CERT_WARNING:
1968     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969   elif errcode == utils.CERT_ERROR:
1970     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1971
1972   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1973
1974
1975 def _GetAllHypervisorParameters(cluster, instances):
1976   """Compute the set of all hypervisor parameters.
1977
1978   @type cluster: L{objects.Cluster}
1979   @param cluster: the cluster object
1980   @param instances: list of L{objects.Instance}
1981   @param instances: additional instances from which to obtain parameters
1982   @rtype: list of (origin, hypervisor, parameters)
1983   @return: a list with all parameters found, indicating the hypervisor they
1984        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1985
1986   """
1987   hvp_data = []
1988
1989   for hv_name in cluster.enabled_hypervisors:
1990     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1991
1992   for os_name, os_hvp in cluster.os_hvp.items():
1993     for hv_name, hv_params in os_hvp.items():
1994       if hv_params:
1995         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1997
1998   # TODO: collapse identical parameter values in a single one
1999   for instance in instances:
2000     if instance.hvparams:
2001       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002                        cluster.FillHV(instance)))
2003
2004   return hvp_data
2005
2006
2007 class _VerifyErrors(object):
2008   """Mix-in for cluster/group verify LUs.
2009
2010   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011   self.op and self._feedback_fn to be available.)
2012
2013   """
2014
2015   ETYPE_FIELD = "code"
2016   ETYPE_ERROR = "ERROR"
2017   ETYPE_WARNING = "WARNING"
2018
2019   def _Error(self, ecode, item, msg, *args, **kwargs):
2020     """Format an error message.
2021
2022     Based on the opcode's error_codes parameter, either format a
2023     parseable error code, or a simpler error string.
2024
2025     This must be called only from Exec and functions called from Exec.
2026
2027     """
2028     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029     itype, etxt, _ = ecode
2030     # If the error code is in the list of ignored errors, demote the error to a
2031     # warning
2032     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2033       ltype = self.ETYPE_WARNING
2034     # first complete the msg
2035     if args:
2036       msg = msg % args
2037     # then format the whole message
2038     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2039       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2040     else:
2041       if item:
2042         item = " " + item
2043       else:
2044         item = ""
2045       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2046     # and finally report it via the feedback_fn
2047     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2048     # do not mark the operation as failed for WARN cases only
2049     if ltype == self.ETYPE_ERROR:
2050       self.bad = True
2051
2052   def _ErrorIf(self, cond, *args, **kwargs):
2053     """Log an error message if the passed condition is True.
2054
2055     """
2056     if (bool(cond)
2057         or self.op.debug_simulate_errors): # pylint: disable=E1101
2058       self._Error(*args, **kwargs)
2059
2060
2061 class LUClusterVerify(NoHooksLU):
2062   """Submits all jobs necessary to verify the cluster.
2063
2064   """
2065   REQ_BGL = False
2066
2067   def ExpandNames(self):
2068     self.needed_locks = {}
2069
2070   def Exec(self, feedback_fn):
2071     jobs = []
2072
2073     if self.op.group_name:
2074       groups = [self.op.group_name]
2075       depends_fn = lambda: None
2076     else:
2077       groups = self.cfg.GetNodeGroupList()
2078
2079       # Verify global configuration
2080       jobs.append([
2081         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2082         ])
2083
2084       # Always depend on global verification
2085       depends_fn = lambda: [(-len(jobs), [])]
2086
2087     jobs.extend(
2088       [opcodes.OpClusterVerifyGroup(group_name=group,
2089                                     ignore_errors=self.op.ignore_errors,
2090                                     depends=depends_fn())]
2091       for group in groups)
2092
2093     # Fix up all parameters
2094     for op in itertools.chain(*jobs): # pylint: disable=W0142
2095       op.debug_simulate_errors = self.op.debug_simulate_errors
2096       op.verbose = self.op.verbose
2097       op.error_codes = self.op.error_codes
2098       try:
2099         op.skip_checks = self.op.skip_checks
2100       except AttributeError:
2101         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2102
2103     return ResultWithJobs(jobs)
2104
2105
2106 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2107   """Verifies the cluster config.
2108
2109   """
2110   REQ_BGL = False
2111
2112   def _VerifyHVP(self, hvp_data):
2113     """Verifies locally the syntax of the hypervisor parameters.
2114
2115     """
2116     for item, hv_name, hv_params in hvp_data:
2117       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2118              (item, hv_name))
2119       try:
2120         hv_class = hypervisor.GetHypervisorClass(hv_name)
2121         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2122         hv_class.CheckParameterSyntax(hv_params)
2123       except errors.GenericError, err:
2124         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2125
2126   def ExpandNames(self):
2127     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2128     self.share_locks = _ShareAll()
2129
2130   def CheckPrereq(self):
2131     """Check prerequisites.
2132
2133     """
2134     # Retrieve all information
2135     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2136     self.all_node_info = self.cfg.GetAllNodesInfo()
2137     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2138
2139   def Exec(self, feedback_fn):
2140     """Verify integrity of cluster, performing various test on nodes.
2141
2142     """
2143     self.bad = False
2144     self._feedback_fn = feedback_fn
2145
2146     feedback_fn("* Verifying cluster config")
2147
2148     for msg in self.cfg.VerifyConfig():
2149       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2150
2151     feedback_fn("* Verifying cluster certificate files")
2152
2153     for cert_filename in pathutils.ALL_CERT_FILES:
2154       (errcode, msg) = _VerifyCertificate(cert_filename)
2155       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2156
2157     feedback_fn("* Verifying hypervisor parameters")
2158
2159     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2160                                                 self.all_inst_info.values()))
2161
2162     feedback_fn("* Verifying all nodes belong to an existing group")
2163
2164     # We do this verification here because, should this bogus circumstance
2165     # occur, it would never be caught by VerifyGroup, which only acts on
2166     # nodes/instances reachable from existing node groups.
2167
2168     dangling_nodes = set(node.name for node in self.all_node_info.values()
2169                          if node.group not in self.all_group_info)
2170
2171     dangling_instances = {}
2172     no_node_instances = []
2173
2174     for inst in self.all_inst_info.values():
2175       if inst.primary_node in dangling_nodes:
2176         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2177       elif inst.primary_node not in self.all_node_info:
2178         no_node_instances.append(inst.name)
2179
2180     pretty_dangling = [
2181         "%s (%s)" %
2182         (node.name,
2183          utils.CommaJoin(dangling_instances.get(node.name,
2184                                                 ["no instances"])))
2185         for node in dangling_nodes]
2186
2187     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2188                   None,
2189                   "the following nodes (and their instances) belong to a non"
2190                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2191
2192     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2193                   None,
2194                   "the following instances have a non-existing primary-node:"
2195                   " %s", utils.CommaJoin(no_node_instances))
2196
2197     return not self.bad
2198
2199
2200 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2201   """Verifies the status of a node group.
2202
2203   """
2204   HPATH = "cluster-verify"
2205   HTYPE = constants.HTYPE_CLUSTER
2206   REQ_BGL = False
2207
2208   _HOOKS_INDENT_RE = re.compile("^", re.M)
2209
2210   class NodeImage(object):
2211     """A class representing the logical and physical status of a node.
2212
2213     @type name: string
2214     @ivar name: the node name to which this object refers
2215     @ivar volumes: a structure as returned from
2216         L{ganeti.backend.GetVolumeList} (runtime)
2217     @ivar instances: a list of running instances (runtime)
2218     @ivar pinst: list of configured primary instances (config)
2219     @ivar sinst: list of configured secondary instances (config)
2220     @ivar sbp: dictionary of {primary-node: list of instances} for all
2221         instances for which this node is secondary (config)
2222     @ivar mfree: free memory, as reported by hypervisor (runtime)
2223     @ivar dfree: free disk, as reported by the node (runtime)
2224     @ivar offline: the offline status (config)
2225     @type rpc_fail: boolean
2226     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2227         not whether the individual keys were correct) (runtime)
2228     @type lvm_fail: boolean
2229     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2230     @type hyp_fail: boolean
2231     @ivar hyp_fail: whether the RPC call didn't return the instance list
2232     @type ghost: boolean
2233     @ivar ghost: whether this is a known node or not (config)
2234     @type os_fail: boolean
2235     @ivar os_fail: whether the RPC call didn't return valid OS data
2236     @type oslist: list
2237     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2238     @type vm_capable: boolean
2239     @ivar vm_capable: whether the node can host instances
2240     @type pv_min: float
2241     @ivar pv_min: size in MiB of the smallest PVs
2242     @type pv_max: float
2243     @ivar pv_max: size in MiB of the biggest PVs
2244
2245     """
2246     def __init__(self, offline=False, name=None, vm_capable=True):
2247       self.name = name
2248       self.volumes = {}
2249       self.instances = []
2250       self.pinst = []
2251       self.sinst = []
2252       self.sbp = {}
2253       self.mfree = 0
2254       self.dfree = 0
2255       self.offline = offline
2256       self.vm_capable = vm_capable
2257       self.rpc_fail = False
2258       self.lvm_fail = False
2259       self.hyp_fail = False
2260       self.ghost = False
2261       self.os_fail = False
2262       self.oslist = {}
2263       self.pv_min = None
2264       self.pv_max = None
2265
2266   def ExpandNames(self):
2267     # This raises errors.OpPrereqError on its own:
2268     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2269
2270     # Get instances in node group; this is unsafe and needs verification later
2271     inst_names = \
2272       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2273
2274     self.needed_locks = {
2275       locking.LEVEL_INSTANCE: inst_names,
2276       locking.LEVEL_NODEGROUP: [self.group_uuid],
2277       locking.LEVEL_NODE: [],
2278
2279       # This opcode is run by watcher every five minutes and acquires all nodes
2280       # for a group. It doesn't run for a long time, so it's better to acquire
2281       # the node allocation lock as well.
2282       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2283       }
2284
2285     self.share_locks = _ShareAll()
2286
2287   def DeclareLocks(self, level):
2288     if level == locking.LEVEL_NODE:
2289       # Get members of node group; this is unsafe and needs verification later
2290       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2291
2292       all_inst_info = self.cfg.GetAllInstancesInfo()
2293
2294       # In Exec(), we warn about mirrored instances that have primary and
2295       # secondary living in separate node groups. To fully verify that
2296       # volumes for these instances are healthy, we will need to do an
2297       # extra call to their secondaries. We ensure here those nodes will
2298       # be locked.
2299       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2300         # Important: access only the instances whose lock is owned
2301         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2302           nodes.update(all_inst_info[inst].secondary_nodes)
2303
2304       self.needed_locks[locking.LEVEL_NODE] = nodes
2305
2306   def CheckPrereq(self):
2307     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2308     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2309
2310     group_nodes = set(self.group_info.members)
2311     group_instances = \
2312       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2313
2314     unlocked_nodes = \
2315         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2316
2317     unlocked_instances = \
2318         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2319
2320     if unlocked_nodes:
2321       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2322                                  utils.CommaJoin(unlocked_nodes),
2323                                  errors.ECODE_STATE)
2324
2325     if unlocked_instances:
2326       raise errors.OpPrereqError("Missing lock for instances: %s" %
2327                                  utils.CommaJoin(unlocked_instances),
2328                                  errors.ECODE_STATE)
2329
2330     self.all_node_info = self.cfg.GetAllNodesInfo()
2331     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2332
2333     self.my_node_names = utils.NiceSort(group_nodes)
2334     self.my_inst_names = utils.NiceSort(group_instances)
2335
2336     self.my_node_info = dict((name, self.all_node_info[name])
2337                              for name in self.my_node_names)
2338
2339     self.my_inst_info = dict((name, self.all_inst_info[name])
2340                              for name in self.my_inst_names)
2341
2342     # We detect here the nodes that will need the extra RPC calls for verifying
2343     # split LV volumes; they should be locked.
2344     extra_lv_nodes = set()
2345
2346     for inst in self.my_inst_info.values():
2347       if inst.disk_template in constants.DTS_INT_MIRROR:
2348         for nname in inst.all_nodes:
2349           if self.all_node_info[nname].group != self.group_uuid:
2350             extra_lv_nodes.add(nname)
2351
2352     unlocked_lv_nodes = \
2353         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2354
2355     if unlocked_lv_nodes:
2356       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2357                                  utils.CommaJoin(unlocked_lv_nodes),
2358                                  errors.ECODE_STATE)
2359     self.extra_lv_nodes = list(extra_lv_nodes)
2360
2361   def _VerifyNode(self, ninfo, nresult):
2362     """Perform some basic validation on data returned from a node.
2363
2364       - check the result data structure is well formed and has all the
2365         mandatory fields
2366       - check ganeti version
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the results from the node
2371     @rtype: boolean
2372     @return: whether overall this call was successful (and we can expect
2373          reasonable values in the respose)
2374
2375     """
2376     node = ninfo.name
2377     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378
2379     # main result, nresult should be a non-empty dict
2380     test = not nresult or not isinstance(nresult, dict)
2381     _ErrorIf(test, constants.CV_ENODERPC, node,
2382                   "unable to verify node: no data returned")
2383     if test:
2384       return False
2385
2386     # compares ganeti version
2387     local_version = constants.PROTOCOL_VERSION
2388     remote_version = nresult.get("version", None)
2389     test = not (remote_version and
2390                 isinstance(remote_version, (list, tuple)) and
2391                 len(remote_version) == 2)
2392     _ErrorIf(test, constants.CV_ENODERPC, node,
2393              "connection to node returned invalid data")
2394     if test:
2395       return False
2396
2397     test = local_version != remote_version[0]
2398     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2399              "incompatible protocol versions: master %s,"
2400              " node %s", local_version, remote_version[0])
2401     if test:
2402       return False
2403
2404     # node seems compatible, we can actually try to look into its results
2405
2406     # full package version
2407     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2408                   constants.CV_ENODEVERSION, node,
2409                   "software version mismatch: master %s, node %s",
2410                   constants.RELEASE_VERSION, remote_version[1],
2411                   code=self.ETYPE_WARNING)
2412
2413     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2414     if ninfo.vm_capable and isinstance(hyp_result, dict):
2415       for hv_name, hv_result in hyp_result.iteritems():
2416         test = hv_result is not None
2417         _ErrorIf(test, constants.CV_ENODEHV, node,
2418                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2419
2420     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2421     if ninfo.vm_capable and isinstance(hvp_result, list):
2422       for item, hv_name, hv_result in hvp_result:
2423         _ErrorIf(True, constants.CV_ENODEHV, node,
2424                  "hypervisor %s parameter verify failure (source %s): %s",
2425                  hv_name, item, hv_result)
2426
2427     test = nresult.get(constants.NV_NODESETUP,
2428                        ["Missing NODESETUP results"])
2429     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2430              "; ".join(test))
2431
2432     return True
2433
2434   def _VerifyNodeTime(self, ninfo, nresult,
2435                       nvinfo_starttime, nvinfo_endtime):
2436     """Check the node time.
2437
2438     @type ninfo: L{objects.Node}
2439     @param ninfo: the node to check
2440     @param nresult: the remote results for the node
2441     @param nvinfo_starttime: the start time of the RPC call
2442     @param nvinfo_endtime: the end time of the RPC call
2443
2444     """
2445     node = ninfo.name
2446     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2447
2448     ntime = nresult.get(constants.NV_TIME, None)
2449     try:
2450       ntime_merged = utils.MergeTime(ntime)
2451     except (ValueError, TypeError):
2452       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2453       return
2454
2455     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2456       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2457     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2458       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2459     else:
2460       ntime_diff = None
2461
2462     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2463              "Node time diverges by at least %s from master node time",
2464              ntime_diff)
2465
2466   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2467     """Check the node LVM results and update info for cross-node checks.
2468
2469     @type ninfo: L{objects.Node}
2470     @param ninfo: the node to check
2471     @param nresult: the remote results for the node
2472     @param vg_name: the configured VG name
2473     @type nimg: L{NodeImage}
2474     @param nimg: node image
2475
2476     """
2477     if vg_name is None:
2478       return
2479
2480     node = ninfo.name
2481     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2482
2483     # checks vg existence and size > 20G
2484     vglist = nresult.get(constants.NV_VGLIST, None)
2485     test = not vglist
2486     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2487     if not test:
2488       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2489                                             constants.MIN_VG_SIZE)
2490       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2491
2492     # Check PVs
2493     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2494     for em in errmsgs:
2495       self._Error(constants.CV_ENODELVM, node, em)
2496     if pvminmax is not None:
2497       (nimg.pv_min, nimg.pv_max) = pvminmax
2498
2499   def _VerifyGroupLVM(self, node_image, vg_name):
2500     """Check cross-node consistency in LVM.
2501
2502     @type node_image: dict
2503     @param node_image: info about nodes, mapping from node to names to
2504       L{NodeImage} objects
2505     @param vg_name: the configured VG name
2506
2507     """
2508     if vg_name is None:
2509       return
2510
2511     # Only exlcusive storage needs this kind of checks
2512     if not self._exclusive_storage:
2513       return
2514
2515     # exclusive_storage wants all PVs to have the same size (approximately),
2516     # if the smallest and the biggest ones are okay, everything is fine.
2517     # pv_min is None iff pv_max is None
2518     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2519     if not vals:
2520       return
2521     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2522     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2523     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2524     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2525                   "PV sizes differ too much in the group; smallest (%s MB) is"
2526                   " on %s, biggest (%s MB) is on %s",
2527                   pvmin, minnode, pvmax, maxnode)
2528
2529   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2530     """Check the node bridges.
2531
2532     @type ninfo: L{objects.Node}
2533     @param ninfo: the node to check
2534     @param nresult: the remote results for the node
2535     @param bridges: the expected list of bridges
2536
2537     """
2538     if not bridges:
2539       return
2540
2541     node = ninfo.name
2542     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2543
2544     missing = nresult.get(constants.NV_BRIDGES, None)
2545     test = not isinstance(missing, list)
2546     _ErrorIf(test, constants.CV_ENODENET, node,
2547              "did not return valid bridge information")
2548     if not test:
2549       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2550                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2551
2552   def _VerifyNodeUserScripts(self, ninfo, nresult):
2553     """Check the results of user scripts presence and executability on the node
2554
2555     @type ninfo: L{objects.Node}
2556     @param ninfo: the node to check
2557     @param nresult: the remote results for the node
2558
2559     """
2560     node = ninfo.name
2561
2562     test = not constants.NV_USERSCRIPTS in nresult
2563     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2564                   "did not return user scripts information")
2565
2566     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2567     if not test:
2568       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2569                     "user scripts not present or not executable: %s" %
2570                     utils.CommaJoin(sorted(broken_scripts)))
2571
2572   def _VerifyNodeNetwork(self, ninfo, nresult):
2573     """Check the node network connectivity results.
2574
2575     @type ninfo: L{objects.Node}
2576     @param ninfo: the node to check
2577     @param nresult: the remote results for the node
2578
2579     """
2580     node = ninfo.name
2581     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2582
2583     test = constants.NV_NODELIST not in nresult
2584     _ErrorIf(test, constants.CV_ENODESSH, node,
2585              "node hasn't returned node ssh connectivity data")
2586     if not test:
2587       if nresult[constants.NV_NODELIST]:
2588         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2589           _ErrorIf(True, constants.CV_ENODESSH, node,
2590                    "ssh communication with node '%s': %s", a_node, a_msg)
2591
2592     test = constants.NV_NODENETTEST not in nresult
2593     _ErrorIf(test, constants.CV_ENODENET, node,
2594              "node hasn't returned node tcp connectivity data")
2595     if not test:
2596       if nresult[constants.NV_NODENETTEST]:
2597         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2598         for anode in nlist:
2599           _ErrorIf(True, constants.CV_ENODENET, node,
2600                    "tcp communication with node '%s': %s",
2601                    anode, nresult[constants.NV_NODENETTEST][anode])
2602
2603     test = constants.NV_MASTERIP not in nresult
2604     _ErrorIf(test, constants.CV_ENODENET, node,
2605              "node hasn't returned node master IP reachability data")
2606     if not test:
2607       if not nresult[constants.NV_MASTERIP]:
2608         if node == self.master_node:
2609           msg = "the master node cannot reach the master IP (not configured?)"
2610         else:
2611           msg = "cannot reach the master IP"
2612         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2613
2614   def _VerifyInstance(self, instance, inst_config, node_image,
2615                       diskstatus):
2616     """Verify an instance.
2617
2618     This function checks to see if the required block devices are
2619     available on the instance's node, and that the nodes are in the correct
2620     state.
2621
2622     """
2623     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624     pnode = inst_config.primary_node
2625     pnode_img = node_image[pnode]
2626     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627
2628     node_vol_should = {}
2629     inst_config.MapLVsByNode(node_vol_should)
2630
2631     cluster = self.cfg.GetClusterInfo()
2632     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2633                                                             self.group_info)
2634     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2635     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2636              code=self.ETYPE_WARNING)
2637
2638     for node in node_vol_should:
2639       n_img = node_image[node]
2640       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2641         # ignore missing volumes on offline or broken nodes
2642         continue
2643       for volume in node_vol_should[node]:
2644         test = volume not in n_img.volumes
2645         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2646                  "volume %s missing on node %s", volume, node)
2647
2648     if inst_config.admin_state == constants.ADMINST_UP:
2649       test = instance not in pnode_img.instances and not pnode_img.offline
2650       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2651                "instance not running on its primary node %s",
2652                pnode)
2653       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2654                "instance is marked as running and lives on offline node %s",
2655                pnode)
2656
2657     diskdata = [(nname, success, status, idx)
2658                 for (nname, disks) in diskstatus.items()
2659                 for idx, (success, status) in enumerate(disks)]
2660
2661     for nname, success, bdev_status, idx in diskdata:
2662       # the 'ghost node' construction in Exec() ensures that we have a
2663       # node here
2664       snode = node_image[nname]
2665       bad_snode = snode.ghost or snode.offline
2666       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2667                not success and not bad_snode,
2668                constants.CV_EINSTANCEFAULTYDISK, instance,
2669                "couldn't retrieve status for disk/%s on %s: %s",
2670                idx, nname, bdev_status)
2671       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2672                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2673                constants.CV_EINSTANCEFAULTYDISK, instance,
2674                "disk/%s on %s is faulty", idx, nname)
2675
2676     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2677              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2678              " primary node failed", instance)
2679
2680     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2681              constants.CV_EINSTANCELAYOUT,
2682              instance, "instance has multiple secondary nodes: %s",
2683              utils.CommaJoin(inst_config.secondary_nodes),
2684              code=self.ETYPE_WARNING)
2685
2686     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2687       # Disk template not compatible with exclusive_storage: no instance
2688       # node should have the flag set
2689       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2690                                                      inst_config.all_nodes)
2691       es_nodes = [n for (n, es) in es_flags.items()
2692                   if es]
2693       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2694                "instance has template %s, which is not supported on nodes"
2695                " that have exclusive storage set: %s",
2696                inst_config.disk_template, utils.CommaJoin(es_nodes))
2697
2698     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2699       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2700       instance_groups = {}
2701
2702       for node in instance_nodes:
2703         instance_groups.setdefault(self.all_node_info[node].group,
2704                                    []).append(node)
2705
2706       pretty_list = [
2707         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2708         # Sort so that we always list the primary node first.
2709         for group, nodes in sorted(instance_groups.items(),
2710                                    key=lambda (_, nodes): pnode in nodes,
2711                                    reverse=True)]
2712
2713       self._ErrorIf(len(instance_groups) > 1,
2714                     constants.CV_EINSTANCESPLITGROUPS,
2715                     instance, "instance has primary and secondary nodes in"
2716                     " different groups: %s", utils.CommaJoin(pretty_list),
2717                     code=self.ETYPE_WARNING)
2718
2719     inst_nodes_offline = []
2720     for snode in inst_config.secondary_nodes:
2721       s_img = node_image[snode]
2722       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2723                snode, "instance %s, connection to secondary node failed",
2724                instance)
2725
2726       if s_img.offline:
2727         inst_nodes_offline.append(snode)
2728
2729     # warn that the instance lives on offline nodes
2730     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2731              "instance has offline secondary node(s) %s",
2732              utils.CommaJoin(inst_nodes_offline))
2733     # ... or ghost/non-vm_capable nodes
2734     for node in inst_config.all_nodes:
2735       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2736                instance, "instance lives on ghost node %s", node)
2737       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2738                instance, "instance lives on non-vm_capable node %s", node)
2739
2740   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2741     """Verify if there are any unknown volumes in the cluster.
2742
2743     The .os, .swap and backup volumes are ignored. All other volumes are
2744     reported as unknown.
2745
2746     @type reserved: L{ganeti.utils.FieldSet}
2747     @param reserved: a FieldSet of reserved volume names
2748
2749     """
2750     for node, n_img in node_image.items():
2751       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2752           self.all_node_info[node].group != self.group_uuid):
2753         # skip non-healthy nodes
2754         continue
2755       for volume in n_img.volumes:
2756         test = ((node not in node_vol_should or
2757                 volume not in node_vol_should[node]) and
2758                 not reserved.Matches(volume))
2759         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2760                       "volume %s is unknown", volume)
2761
2762   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2763     """Verify N+1 Memory Resilience.
2764
2765     Check that if one single node dies we can still start all the
2766     instances it was primary for.
2767
2768     """
2769     cluster_info = self.cfg.GetClusterInfo()
2770     for node, n_img in node_image.items():
2771       # This code checks that every node which is now listed as
2772       # secondary has enough memory to host all instances it is
2773       # supposed to should a single other node in the cluster fail.
2774       # FIXME: not ready for failover to an arbitrary node
2775       # FIXME: does not support file-backed instances
2776       # WARNING: we currently take into account down instances as well
2777       # as up ones, considering that even if they're down someone
2778       # might want to start them even in the event of a node failure.
2779       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2780         # we're skipping nodes marked offline and nodes in other groups from
2781         # the N+1 warning, since most likely we don't have good memory
2782         # infromation from them; we already list instances living on such
2783         # nodes, and that's enough warning
2784         continue
2785       #TODO(dynmem): also consider ballooning out other instances
2786       for prinode, instances in n_img.sbp.items():
2787         needed_mem = 0
2788         for instance in instances:
2789           bep = cluster_info.FillBE(instance_cfg[instance])
2790           if bep[constants.BE_AUTO_BALANCE]:
2791             needed_mem += bep[constants.BE_MINMEM]
2792         test = n_img.mfree < needed_mem
2793         self._ErrorIf(test, constants.CV_ENODEN1, node,
2794                       "not enough memory to accomodate instance failovers"
2795                       " should node %s fail (%dMiB needed, %dMiB available)",
2796                       prinode, needed_mem, n_img.mfree)
2797
2798   @classmethod
2799   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2800                    (files_all, files_opt, files_mc, files_vm)):
2801     """Verifies file checksums collected from all nodes.
2802
2803     @param errorif: Callback for reporting errors
2804     @param nodeinfo: List of L{objects.Node} objects
2805     @param master_node: Name of master node
2806     @param all_nvinfo: RPC results
2807
2808     """
2809     # Define functions determining which nodes to consider for a file
2810     files2nodefn = [
2811       (files_all, None),
2812       (files_mc, lambda node: (node.master_candidate or
2813                                node.name == master_node)),
2814       (files_vm, lambda node: node.vm_capable),
2815       ]
2816
2817     # Build mapping from filename to list of nodes which should have the file
2818     nodefiles = {}
2819     for (files, fn) in files2nodefn:
2820       if fn is None:
2821         filenodes = nodeinfo
2822       else:
2823         filenodes = filter(fn, nodeinfo)
2824       nodefiles.update((filename,
2825                         frozenset(map(operator.attrgetter("name"), filenodes)))
2826                        for filename in files)
2827
2828     assert set(nodefiles) == (files_all | files_mc | files_vm)
2829
2830     fileinfo = dict((filename, {}) for filename in nodefiles)
2831     ignore_nodes = set()
2832
2833     for node in nodeinfo:
2834       if node.offline:
2835         ignore_nodes.add(node.name)
2836         continue
2837
2838       nresult = all_nvinfo[node.name]
2839
2840       if nresult.fail_msg or not nresult.payload:
2841         node_files = None
2842       else:
2843         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2844         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2845                           for (key, value) in fingerprints.items())
2846         del fingerprints
2847
2848       test = not (node_files and isinstance(node_files, dict))
2849       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2850               "Node did not return file checksum data")
2851       if test:
2852         ignore_nodes.add(node.name)
2853         continue
2854
2855       # Build per-checksum mapping from filename to nodes having it
2856       for (filename, checksum) in node_files.items():
2857         assert filename in nodefiles
2858         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2859
2860     for (filename, checksums) in fileinfo.items():
2861       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2862
2863       # Nodes having the file
2864       with_file = frozenset(node_name
2865                             for nodes in fileinfo[filename].values()
2866                             for node_name in nodes) - ignore_nodes
2867
2868       expected_nodes = nodefiles[filename] - ignore_nodes
2869
2870       # Nodes missing file
2871       missing_file = expected_nodes - with_file
2872
2873       if filename in files_opt:
2874         # All or no nodes
2875         errorif(missing_file and missing_file != expected_nodes,
2876                 constants.CV_ECLUSTERFILECHECK, None,
2877                 "File %s is optional, but it must exist on all or no"
2878                 " nodes (not found on %s)",
2879                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2880       else:
2881         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2882                 "File %s is missing from node(s) %s", filename,
2883                 utils.CommaJoin(utils.NiceSort(missing_file)))
2884
2885         # Warn if a node has a file it shouldn't
2886         unexpected = with_file - expected_nodes
2887         errorif(unexpected,
2888                 constants.CV_ECLUSTERFILECHECK, None,
2889                 "File %s should not exist on node(s) %s",
2890                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2891
2892       # See if there are multiple versions of the file
2893       test = len(checksums) > 1
2894       if test:
2895         variants = ["variant %s on %s" %
2896                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2897                     for (idx, (checksum, nodes)) in
2898                       enumerate(sorted(checksums.items()))]
2899       else:
2900         variants = []
2901
2902       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2903               "File %s found with %s different checksums (%s)",
2904               filename, len(checksums), "; ".join(variants))
2905
2906   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2907                       drbd_map):
2908     """Verifies and the node DRBD status.
2909
2910     @type ninfo: L{objects.Node}
2911     @param ninfo: the node to check
2912     @param nresult: the remote results for the node
2913     @param instanceinfo: the dict of instances
2914     @param drbd_helper: the configured DRBD usermode helper
2915     @param drbd_map: the DRBD map as returned by
2916         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2917
2918     """
2919     node = ninfo.name
2920     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2921
2922     if drbd_helper:
2923       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2924       test = (helper_result is None)
2925       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926                "no drbd usermode helper returned")
2927       if helper_result:
2928         status, payload = helper_result
2929         test = not status
2930         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931                  "drbd usermode helper check unsuccessful: %s", payload)
2932         test = status and (payload != drbd_helper)
2933         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2934                  "wrong drbd usermode helper: %s", payload)
2935
2936     # compute the DRBD minors
2937     node_drbd = {}
2938     for minor, instance in drbd_map[node].items():
2939       test = instance not in instanceinfo
2940       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2941                "ghost instance '%s' in temporary DRBD map", instance)
2942         # ghost instance should not be running, but otherwise we
2943         # don't give double warnings (both ghost instance and
2944         # unallocated minor in use)
2945       if test:
2946         node_drbd[minor] = (instance, False)
2947       else:
2948         instance = instanceinfo[instance]
2949         node_drbd[minor] = (instance.name,
2950                             instance.admin_state == constants.ADMINST_UP)
2951
2952     # and now check them
2953     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2954     test = not isinstance(used_minors, (tuple, list))
2955     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2956              "cannot parse drbd status file: %s", str(used_minors))
2957     if test:
2958       # we cannot check drbd status
2959       return
2960
2961     for minor, (iname, must_exist) in node_drbd.items():
2962       test = minor not in used_minors and must_exist
2963       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964                "drbd minor %d of instance %s is not active", minor, iname)
2965     for minor in used_minors:
2966       test = minor not in node_drbd
2967       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2968                "unallocated drbd minor %d is in use", minor)
2969
2970   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2971     """Builds the node OS structures.
2972
2973     @type ninfo: L{objects.Node}
2974     @param ninfo: the node to check
2975     @param nresult: the remote results for the node
2976     @param nimg: the node image object
2977
2978     """
2979     node = ninfo.name
2980     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981
2982     remote_os = nresult.get(constants.NV_OSLIST, None)
2983     test = (not isinstance(remote_os, list) or
2984             not compat.all(isinstance(v, list) and len(v) == 7
2985                            for v in remote_os))
2986
2987     _ErrorIf(test, constants.CV_ENODEOS, node,
2988              "node hasn't returned valid OS data")
2989
2990     nimg.os_fail = test
2991
2992     if test:
2993       return
2994
2995     os_dict = {}
2996
2997     for (name, os_path, status, diagnose,
2998          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2999
3000       if name not in os_dict:
3001         os_dict[name] = []
3002
3003       # parameters is a list of lists instead of list of tuples due to
3004       # JSON lacking a real tuple type, fix it:
3005       parameters = [tuple(v) for v in parameters]
3006       os_dict[name].append((os_path, status, diagnose,
3007                             set(variants), set(parameters), set(api_ver)))
3008
3009     nimg.oslist = os_dict
3010
3011   def _VerifyNodeOS(self, ninfo, nimg, base):
3012     """Verifies the node OS list.
3013
3014     @type ninfo: L{objects.Node}
3015     @param ninfo: the node to check
3016     @param nimg: the node image object
3017     @param base: the 'template' node we match against (e.g. from the master)
3018
3019     """
3020     node = ninfo.name
3021     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3022
3023     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3024
3025     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3026     for os_name, os_data in nimg.oslist.items():
3027       assert os_data, "Empty OS status for OS %s?!" % os_name
3028       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3029       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3030                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3031       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3032                "OS '%s' has multiple entries (first one shadows the rest): %s",
3033                os_name, utils.CommaJoin([v[0] for v in os_data]))
3034       # comparisons with the 'base' image
3035       test = os_name not in base.oslist
3036       _ErrorIf(test, constants.CV_ENODEOS, node,
3037                "Extra OS %s not present on reference node (%s)",
3038                os_name, base.name)
3039       if test:
3040         continue
3041       assert base.oslist[os_name], "Base node has empty OS status?"
3042       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3043       if not b_status:
3044         # base OS is invalid, skipping
3045         continue
3046       for kind, a, b in [("API version", f_api, b_api),
3047                          ("variants list", f_var, b_var),
3048                          ("parameters", beautify_params(f_param),
3049                           beautify_params(b_param))]:
3050         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3051                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3052                  kind, os_name, base.name,
3053                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3054
3055     # check any missing OSes
3056     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3057     _ErrorIf(missing, constants.CV_ENODEOS, node,
3058              "OSes present on reference node %s but missing on this node: %s",
3059              base.name, utils.CommaJoin(missing))
3060
3061   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3062     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3063
3064     @type ninfo: L{objects.Node}
3065     @param ninfo: the node to check
3066     @param nresult: the remote results for the node
3067     @type is_master: bool
3068     @param is_master: Whether node is the master node
3069
3070     """
3071     node = ninfo.name
3072
3073     if (is_master and
3074         (constants.ENABLE_FILE_STORAGE or
3075          constants.ENABLE_SHARED_FILE_STORAGE)):
3076       try:
3077         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3078       except KeyError:
3079         # This should never happen
3080         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3081                       "Node did not return forbidden file storage paths")
3082       else:
3083         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3084                       "Found forbidden file storage paths: %s",
3085                       utils.CommaJoin(fspaths))
3086     else:
3087       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3088                     constants.CV_ENODEFILESTORAGEPATHS, node,
3089                     "Node should not have returned forbidden file storage"
3090                     " paths")
3091
3092   def _VerifyOob(self, ninfo, nresult):
3093     """Verifies out of band functionality of a node.
3094
3095     @type ninfo: L{objects.Node}
3096     @param ninfo: the node to check
3097     @param nresult: the remote results for the node
3098
3099     """
3100     node = ninfo.name
3101     # We just have to verify the paths on master and/or master candidates
3102     # as the oob helper is invoked on the master
3103     if ((ninfo.master_candidate or ninfo.master_capable) and
3104         constants.NV_OOB_PATHS in nresult):
3105       for path_result in nresult[constants.NV_OOB_PATHS]:
3106         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3107
3108   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3109     """Verifies and updates the node volume data.
3110
3111     This function will update a L{NodeImage}'s internal structures
3112     with data from the remote call.
3113
3114     @type ninfo: L{objects.Node}
3115     @param ninfo: the node to check
3116     @param nresult: the remote results for the node
3117     @param nimg: the node image object
3118     @param vg_name: the configured VG name
3119
3120     """
3121     node = ninfo.name
3122     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3123
3124     nimg.lvm_fail = True
3125     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3126     if vg_name is None:
3127       pass
3128     elif isinstance(lvdata, basestring):
3129       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3130                utils.SafeEncode(lvdata))
3131     elif not isinstance(lvdata, dict):
3132       _ErrorIf(True, constants.CV_ENODELVM, node,
3133                "rpc call to node failed (lvlist)")
3134     else:
3135       nimg.volumes = lvdata
3136       nimg.lvm_fail = False
3137
3138   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3139     """Verifies and updates the node instance list.
3140
3141     If the listing was successful, then updates this node's instance
3142     list. Otherwise, it marks the RPC call as failed for the instance
3143     list key.
3144
3145     @type ninfo: L{objects.Node}
3146     @param ninfo: the node to check
3147     @param nresult: the remote results for the node
3148     @param nimg: the node image object
3149
3150     """
3151     idata = nresult.get(constants.NV_INSTANCELIST, None)
3152     test = not isinstance(idata, list)
3153     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3154                   "rpc call to node failed (instancelist): %s",
3155                   utils.SafeEncode(str(idata)))
3156     if test:
3157       nimg.hyp_fail = True
3158     else:
3159       nimg.instances = idata
3160
3161   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3162     """Verifies and computes a node information map
3163
3164     @type ninfo: L{objects.Node}
3165     @param ninfo: the node to check
3166     @param nresult: the remote results for the node
3167     @param nimg: the node image object
3168     @param vg_name: the configured VG name
3169
3170     """
3171     node = ninfo.name
3172     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3173
3174     # try to read free memory (from the hypervisor)
3175     hv_info = nresult.get(constants.NV_HVINFO, None)
3176     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3177     _ErrorIf(test, constants.CV_ENODEHV, node,
3178              "rpc call to node failed (hvinfo)")
3179     if not test:
3180       try:
3181         nimg.mfree = int(hv_info["memory_free"])
3182       except (ValueError, TypeError):
3183         _ErrorIf(True, constants.CV_ENODERPC, node,
3184                  "node returned invalid nodeinfo, check hypervisor")
3185
3186     # FIXME: devise a free space model for file based instances as well
3187     if vg_name is not None:
3188       test = (constants.NV_VGLIST not in nresult or
3189               vg_name not in nresult[constants.NV_VGLIST])
3190       _ErrorIf(test, constants.CV_ENODELVM, node,
3191                "node didn't return data for the volume group '%s'"
3192                " - it is either missing or broken", vg_name)
3193       if not test:
3194         try:
3195           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3196         except (ValueError, TypeError):
3197           _ErrorIf(True, constants.CV_ENODERPC, node,
3198                    "node returned invalid LVM info, check LVM status")
3199
3200   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3201     """Gets per-disk status information for all instances.
3202
3203     @type nodelist: list of strings
3204     @param nodelist: Node names
3205     @type node_image: dict of (name, L{objects.Node})
3206     @param node_image: Node objects
3207     @type instanceinfo: dict of (name, L{objects.Instance})
3208     @param instanceinfo: Instance objects
3209     @rtype: {instance: {node: [(succes, payload)]}}
3210     @return: a dictionary of per-instance dictionaries with nodes as
3211         keys and disk information as values; the disk information is a
3212         list of tuples (success, payload)
3213
3214     """
3215     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3216
3217     node_disks = {}
3218     node_disks_devonly = {}
3219     diskless_instances = set()
3220     diskless = constants.DT_DISKLESS
3221
3222     for nname in nodelist:
3223       node_instances = list(itertools.chain(node_image[nname].pinst,
3224                                             node_image[nname].sinst))
3225       diskless_instances.update(inst for inst in node_instances
3226                                 if instanceinfo[inst].disk_template == diskless)
3227       disks = [(inst, disk)
3228                for inst in node_instances
3229                for disk in instanceinfo[inst].disks]
3230
3231       if not disks:
3232         # No need to collect data
3233         continue
3234
3235       node_disks[nname] = disks
3236
3237       # _AnnotateDiskParams makes already copies of the disks
3238       devonly = []
3239       for (inst, dev) in disks:
3240         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3241         self.cfg.SetDiskID(anno_disk, nname)
3242         devonly.append(anno_disk)
3243
3244       node_disks_devonly[nname] = devonly
3245
3246     assert len(node_disks) == len(node_disks_devonly)
3247
3248     # Collect data from all nodes with disks
3249     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3250                                                           node_disks_devonly)
3251
3252     assert len(result) == len(node_disks)
3253
3254     instdisk = {}
3255
3256     for (nname, nres) in result.items():
3257       disks = node_disks[nname]
3258
3259       if nres.offline:
3260         # No data from this node
3261         data = len(disks) * [(False, "node offline")]
3262       else:
3263         msg = nres.fail_msg
3264         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3265                  "while getting disk information: %s", msg)
3266         if msg:
3267           # No data from this node
3268           data = len(disks) * [(False, msg)]
3269         else:
3270           data = []
3271           for idx, i in enumerate(nres.payload):
3272             if isinstance(i, (tuple, list)) and len(i) == 2:
3273               data.append(i)
3274             else:
3275               logging.warning("Invalid result from node %s, entry %d: %s",
3276                               nname, idx, i)
3277               data.append((False, "Invalid result from the remote node"))
3278
3279       for ((inst, _), status) in zip(disks, data):
3280         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3281
3282     # Add empty entries for diskless instances.
3283     for inst in diskless_instances:
3284       assert inst not in instdisk
3285       instdisk[inst] = {}
3286
3287     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3288                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3289                       compat.all(isinstance(s, (tuple, list)) and
3290                                  len(s) == 2 for s in statuses)
3291                       for inst, nnames in instdisk.items()
3292                       for nname, statuses in nnames.items())
3293     if __debug__:
3294       instdisk_keys = set(instdisk)
3295       instanceinfo_keys = set(instanceinfo)
3296       assert instdisk_keys == instanceinfo_keys, \
3297         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3298          (instdisk_keys, instanceinfo_keys))
3299
3300     return instdisk
3301
3302   @staticmethod
3303   def _SshNodeSelector(group_uuid, all_nodes):
3304     """Create endless iterators for all potential SSH check hosts.
3305
3306     """
3307     nodes = [node for node in all_nodes
3308              if (node.group != group_uuid and
3309                  not node.offline)]
3310     keyfunc = operator.attrgetter("group")
3311
3312     return map(itertools.cycle,
3313                [sorted(map(operator.attrgetter("name"), names))
3314                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3315                                                   keyfunc)])
3316
3317   @classmethod
3318   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3319     """Choose which nodes should talk to which other nodes.
3320
3321     We will make nodes contact all nodes in their group, and one node from
3322     every other group.
3323
3324     @warning: This algorithm has a known issue if one node group is much
3325       smaller than others (e.g. just one node). In such a case all other
3326       nodes will talk to the single node.
3327
3328     """
3329     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3330     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3331
3332     return (online_nodes,
3333             dict((name, sorted([i.next() for i in sel]))
3334                  for name in online_nodes))
3335
3336   def BuildHooksEnv(self):
3337     """Build hooks env.
3338
3339     Cluster-Verify hooks just ran in the post phase and their failure makes
3340     the output be logged in the verify output and the verification to fail.
3341
3342     """
3343     env = {
3344       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3345       }
3346
3347     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3348                for node in self.my_node_info.values())
3349
3350     return env
3351
3352   def BuildHooksNodes(self):
3353     """Build hooks nodes.
3354
3355     """
3356     return ([], self.my_node_names)
3357
3358   def Exec(self, feedback_fn):
3359     """Verify integrity of the node group, performing various test on nodes.
3360
3361     """
3362     # This method has too many local variables. pylint: disable=R0914
3363     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3364
3365     if not self.my_node_names:
3366       # empty node group
3367       feedback_fn("* Empty node group, skipping verification")
3368       return True
3369
3370     self.bad = False
3371     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3372     verbose = self.op.verbose
3373     self._feedback_fn = feedback_fn
3374
3375     vg_name = self.cfg.GetVGName()
3376     drbd_helper = self.cfg.GetDRBDHelper()
3377     cluster = self.cfg.GetClusterInfo()
3378     hypervisors = cluster.enabled_hypervisors
3379     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3380
3381     i_non_redundant = [] # Non redundant instances
3382     i_non_a_balanced = [] # Non auto-balanced instances
3383     i_offline = 0 # Count of offline instances
3384     n_offline = 0 # Count of offline nodes
3385     n_drained = 0 # Count of nodes being drained
3386     node_vol_should = {}
3387
3388     # FIXME: verify OS list
3389
3390     # File verification
3391     filemap = _ComputeAncillaryFiles(cluster, False)
3392
3393     # do local checksums
3394     master_node = self.master_node = self.cfg.GetMasterNode()
3395     master_ip = self.cfg.GetMasterIP()
3396
3397     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3398
3399     user_scripts = []
3400     if self.cfg.GetUseExternalMipScript():
3401       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3402
3403     node_verify_param = {
3404       constants.NV_FILELIST:
3405         map(vcluster.MakeVirtualPath,
3406             utils.UniqueSequence(filename
3407                                  for files in filemap
3408                                  for filename in files)),
3409       constants.NV_NODELIST:
3410         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3411                                   self.all_node_info.values()),
3412       constants.NV_HYPERVISOR: hypervisors,
3413       constants.NV_HVPARAMS:
3414         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3415       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3416                                  for node in node_data_list
3417                                  if not node.offline],
3418       constants.NV_INSTANCELIST: hypervisors,
3419       constants.NV_VERSION: None,
3420       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3421       constants.NV_NODESETUP: None,
3422       constants.NV_TIME: None,
3423       constants.NV_MASTERIP: (master_node, master_ip),
3424       constants.NV_OSLIST: None,
3425       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3426       constants.NV_USERSCRIPTS: user_scripts,
3427       }
3428
3429     if vg_name is not None:
3430       node_verify_param[constants.NV_VGLIST] = None
3431       node_verify_param[constants.NV_LVLIST] = vg_name
3432       node_verify_param[constants.NV_PVLIST] = [vg_name]
3433
3434     if drbd_helper:
3435       node_verify_param[constants.NV_DRBDLIST] = None
3436       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3437
3438     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3439       # Load file storage paths only from master node
3440       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3441
3442     # bridge checks
3443     # FIXME: this needs to be changed per node-group, not cluster-wide
3444     bridges = set()
3445     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3446     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447       bridges.add(default_nicpp[constants.NIC_LINK])
3448     for instance in self.my_inst_info.values():
3449       for nic in instance.nics:
3450         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3451         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452           bridges.add(full_nic[constants.NIC_LINK])
3453
3454     if bridges:
3455       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3456
3457     # Build our expected cluster state
3458     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3459                                                  name=node.name,
3460                                                  vm_capable=node.vm_capable))
3461                       for node in node_data_list)
3462
3463     # Gather OOB paths
3464     oob_paths = []
3465     for node in self.all_node_info.values():
3466       path = _SupportsOob(self.cfg, node)
3467       if path and path not in oob_paths:
3468         oob_paths.append(path)
3469
3470     if oob_paths:
3471       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3472
3473     for instance in self.my_inst_names:
3474       inst_config = self.my_inst_info[instance]
3475       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3476         i_offline += 1
3477
3478       for nname in inst_config.all_nodes:
3479         if nname not in node_image:
3480           gnode = self.NodeImage(name=nname)
3481           gnode.ghost = (nname not in self.all_node_info)
3482           node_image[nname] = gnode
3483
3484       inst_config.MapLVsByNode(node_vol_should)
3485
3486       pnode = inst_config.primary_node
3487       node_image[pnode].pinst.append(instance)
3488
3489       for snode in inst_config.secondary_nodes:
3490         nimg = node_image[snode]
3491         nimg.sinst.append(instance)
3492         if pnode not in nimg.sbp:
3493           nimg.sbp[pnode] = []
3494         nimg.sbp[pnode].append(instance)
3495
3496     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3497     # The value of exclusive_storage should be the same across the group, so if
3498     # it's True for at least a node, we act as if it were set for all the nodes
3499     self._exclusive_storage = compat.any(es_flags.values())
3500     if self._exclusive_storage:
3501       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3502
3503     # At this point, we have the in-memory data structures complete,
3504     # except for the runtime information, which we'll gather next
3505
3506     # Due to the way our RPC system works, exact response times cannot be
3507     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3508     # time before and after executing the request, we can at least have a time
3509     # window.
3510     nvinfo_starttime = time.time()
3511     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3512                                            node_verify_param,
3513                                            self.cfg.GetClusterName())
3514     nvinfo_endtime = time.time()
3515
3516     if self.extra_lv_nodes and vg_name is not None:
3517       extra_lv_nvinfo = \
3518           self.rpc.call_node_verify(self.extra_lv_nodes,
3519                                     {constants.NV_LVLIST: vg_name},
3520                                     self.cfg.GetClusterName())
3521     else:
3522       extra_lv_nvinfo = {}
3523
3524     all_drbd_map = self.cfg.ComputeDRBDMap()
3525
3526     feedback_fn("* Gathering disk information (%s nodes)" %
3527                 len(self.my_node_names))
3528     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3529                                      self.my_inst_info)
3530
3531     feedback_fn("* Verifying configuration file consistency")
3532
3533     # If not all nodes are being checked, we need to make sure the master node
3534     # and a non-checked vm_capable node are in the list.
3535     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3536     if absent_nodes:
3537       vf_nvinfo = all_nvinfo.copy()
3538       vf_node_info = list(self.my_node_info.values())
3539       additional_nodes = []
3540       if master_node not in self.my_node_info:
3541         additional_nodes.append(master_node)
3542         vf_node_info.append(self.all_node_info[master_node])
3543       # Add the first vm_capable node we find which is not included,
3544       # excluding the master node (which we already have)
3545       for node in absent_nodes:
3546         nodeinfo = self.all_node_info[node]
3547         if (nodeinfo.vm_capable and not nodeinfo.offline and
3548             node != master_node):
3549           additional_nodes.append(node)
3550           vf_node_info.append(self.all_node_info[node])
3551           break
3552       key = constants.NV_FILELIST
3553       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3554                                                  {key: node_verify_param[key]},
3555                                                  self.cfg.GetClusterName()))
3556     else:
3557       vf_nvinfo = all_nvinfo
3558       vf_node_info = self.my_node_info.values()
3559
3560     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3561
3562     feedback_fn("* Verifying node status")
3563
3564     refos_img = None
3565
3566     for node_i in node_data_list:
3567       node = node_i.name
3568       nimg = node_image[node]
3569
3570       if node_i.offline:
3571         if verbose:
3572           feedback_fn("* Skipping offline node %s" % (node,))
3573         n_offline += 1
3574         continue
3575
3576       if node == master_node:
3577         ntype = "master"
3578       elif node_i.master_candidate:
3579         ntype = "master candidate"
3580       elif node_i.drained:
3581         ntype = "drained"
3582         n_drained += 1
3583       else:
3584         ntype = "regular"
3585       if verbose:
3586         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3587
3588       msg = all_nvinfo[node].fail_msg
3589       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3590                msg)
3591       if msg:
3592         nimg.rpc_fail = True
3593         continue
3594
3595       nresult = all_nvinfo[node].payload
3596
3597       nimg.call_ok = self._VerifyNode(node_i, nresult)
3598       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3599       self._VerifyNodeNetwork(node_i, nresult)
3600       self._VerifyNodeUserScripts(node_i, nresult)
3601       self._VerifyOob(node_i, nresult)
3602       self._VerifyFileStoragePaths(node_i, nresult,
3603                                    node == master_node)
3604
3605       if nimg.vm_capable:
3606         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3607         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3608                              all_drbd_map)
3609
3610         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3611         self._UpdateNodeInstances(node_i, nresult, nimg)
3612         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3613         self._UpdateNodeOS(node_i, nresult, nimg)
3614
3615         if not nimg.os_fail:
3616           if refos_img is None:
3617             refos_img = nimg
3618           self._VerifyNodeOS(node_i, nimg, refos_img)
3619         self._VerifyNodeBridges(node_i, nresult, bridges)
3620
3621         # Check whether all running instancies are primary for the node. (This
3622         # can no longer be done from _VerifyInstance below, since some of the
3623         # wrong instances could be from other node groups.)
3624         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3625
3626         for inst in non_primary_inst:
3627           test = inst in self.all_inst_info
3628           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3629                    "instance should not run on node %s", node_i.name)
3630           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3631                    "node is running unknown instance %s", inst)
3632
3633     self._VerifyGroupLVM(node_image, vg_name)
3634
3635     for node, result in extra_lv_nvinfo.items():
3636       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3637                               node_image[node], vg_name)
3638
3639     feedback_fn("* Verifying instance status")
3640     for instance in self.my_inst_names:
3641       if verbose:
3642         feedback_fn("* Verifying instance %s" % instance)
3643       inst_config = self.my_inst_info[instance]
3644       self._VerifyInstance(instance, inst_config, node_image,
3645                            instdisk[instance])
3646
3647       # If the instance is non-redundant we cannot survive losing its primary
3648       # node, so we are not N+1 compliant.
3649       if inst_config.disk_template not in constants.DTS_MIRRORED:
3650         i_non_redundant.append(instance)
3651
3652       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3653         i_non_a_balanced.append(instance)
3654
3655     feedback_fn("* Verifying orphan volumes")
3656     reserved = utils.FieldSet(*cluster.reserved_lvs)
3657
3658     # We will get spurious "unknown volume" warnings if any node of this group
3659     # is secondary for an instance whose primary is in another group. To avoid
3660     # them, we find these instances and add their volumes to node_vol_should.
3661     for inst in self.all_inst_info.values():
3662       for secondary in inst.secondary_nodes:
3663         if (secondary in self.my_node_info
3664             and inst.name not in self.my_inst_info):
3665           inst.MapLVsByNode(node_vol_should)
3666           break
3667
3668     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3669
3670     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3671       feedback_fn("* Verifying N+1 Memory redundancy")
3672       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3673
3674     feedback_fn("* Other Notes")
3675     if i_non_redundant:
3676       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3677                   % len(i_non_redundant))
3678
3679     if i_non_a_balanced:
3680       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3681                   % len(i_non_a_balanced))
3682
3683     if i_offline:
3684       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3685
3686     if n_offline:
3687       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3688
3689     if n_drained:
3690       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3691
3692     return not self.bad
3693
3694   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3695     """Analyze the post-hooks' result
3696
3697     This method analyses the hook result, handles it, and sends some
3698     nicely-formatted feedback back to the user.
3699
3700     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3701         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3702     @param hooks_results: the results of the multi-node hooks rpc call
3703     @param feedback_fn: function used send feedback back to the caller
3704     @param lu_result: previous Exec result
3705     @return: the new Exec result, based on the previous result
3706         and hook results
3707
3708     """
3709     # We only really run POST phase hooks, only for non-empty groups,
3710     # and are only interested in their results
3711     if not self.my_node_names:
3712       # empty node group
3713       pass
3714     elif phase == constants.HOOKS_PHASE_POST:
3715       # Used to change hooks' output to proper indentation
3716       feedback_fn("* Hooks Results")
3717       assert hooks_results, "invalid result from hooks"
3718
3719       for node_name in hooks_results:
3720         res = hooks_results[node_name]
3721         msg = res.fail_msg
3722         test = msg and not res.offline
3723         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3724                       "Communication failure in hooks execution: %s", msg)
3725         if res.offline or msg:
3726           # No need to investigate payload if node is offline or gave
3727           # an error.
3728           continue
3729         for script, hkr, output in res.payload:
3730           test = hkr == constants.HKR_FAIL
3731           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3732                         "Script %s failed, output:", script)
3733           if test:
3734             output = self._HOOKS_INDENT_RE.sub("      ", output)
3735             feedback_fn("%s" % output)
3736             lu_result = False
3737
3738     return lu_result
3739
3740
3741 class LUClusterVerifyDisks(NoHooksLU):
3742   """Verifies the cluster disks status.
3743
3744   """
3745   REQ_BGL = False
3746
3747   def ExpandNames(self):
3748     self.share_locks = _ShareAll()
3749     self.needed_locks = {
3750       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3751       }
3752
3753   def Exec(self, feedback_fn):
3754     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3755
3756     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3757     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3758                            for group in group_names])
3759
3760
3761 class LUGroupVerifyDisks(NoHooksLU):
3762   """Verifies the status of all disks in a node group.
3763
3764   """
3765   REQ_BGL = False
3766
3767   def ExpandNames(self):
3768     # Raises errors.OpPrereqError on its own if group can't be found
3769     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3770
3771     self.share_locks = _ShareAll()
3772     self.needed_locks = {
3773       locking.LEVEL_INSTANCE: [],
3774       locking.LEVEL_NODEGROUP: [],
3775       locking.LEVEL_NODE: [],
3776
3777       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3778       # starts one instance of this opcode for every group, which means all
3779       # nodes will be locked for a short amount of time, so it's better to
3780       # acquire the node allocation lock as well.
3781       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3782       }
3783
3784   def DeclareLocks(self, level):
3785     if level == locking.LEVEL_INSTANCE:
3786       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3787
3788       # Lock instances optimistically, needs verification once node and group
3789       # locks have been acquired
3790       self.needed_locks[locking.LEVEL_INSTANCE] = \
3791         self.cfg.GetNodeGroupInstances(self.group_uuid)
3792
3793     elif level == locking.LEVEL_NODEGROUP:
3794       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3795
3796       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3797         set([self.group_uuid] +
3798             # Lock all groups used by instances optimistically; this requires
3799             # going via the node before it's locked, requiring verification
3800             # later on
3801             [group_uuid
3802              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3803              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3804
3805     elif level == locking.LEVEL_NODE:
3806       # This will only lock the nodes in the group to be verified which contain
3807       # actual instances
3808       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3809       self._LockInstancesNodes()
3810
3811       # Lock all nodes in group to be verified
3812       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3813       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3814       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3815
3816   def CheckPrereq(self):
3817     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3818     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3819     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3820
3821     assert self.group_uuid in owned_groups
3822
3823     # Check if locked instances are still correct
3824     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3825
3826     # Get instance information
3827     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3828
3829     # Check if node groups for locked instances are still correct
3830     _CheckInstancesNodeGroups(self.cfg, self.instances,
3831                               owned_groups, owned_nodes, self.group_uuid)
3832
3833   def Exec(self, feedback_fn):
3834     """Verify integrity of cluster disks.
3835
3836     @rtype: tuple of three items
3837     @return: a tuple of (dict of node-to-node_error, list of instances
3838         which need activate-disks, dict of instance: (node, volume) for
3839         missing volumes
3840
3841     """
3842     res_nodes = {}
3843     res_instances = set()
3844     res_missing = {}
3845
3846     nv_dict = _MapInstanceDisksToNodes(
3847       [inst for inst in self.instances.values()
3848        if inst.admin_state == constants.ADMINST_UP])
3849
3850     if nv_dict:
3851       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3852                              set(self.cfg.GetVmCapableNodeList()))
3853
3854       node_lvs = self.rpc.call_lv_list(nodes, [])
3855
3856       for (node, node_res) in node_lvs.items():
3857         if node_res.offline:
3858           continue
3859
3860         msg = node_res.fail_msg
3861         if msg:
3862           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3863           res_nodes[node] = msg
3864           continue
3865
3866         for lv_name, (_, _, lv_online) in node_res.payload.items():
3867           inst = nv_dict.pop((node, lv_name), None)
3868           if not (lv_online or inst is None):
3869             res_instances.add(inst)
3870
3871       # any leftover items in nv_dict are missing LVs, let's arrange the data
3872       # better
3873       for key, inst in nv_dict.iteritems():
3874         res_missing.setdefault(inst, []).append(list(key))
3875
3876     return (res_nodes, list(res_instances), res_missing)
3877
3878
3879 class LUClusterRepairDiskSizes(NoHooksLU):
3880   """Verifies the cluster disks sizes.
3881
3882   """
3883   REQ_BGL = False
3884
3885   def ExpandNames(self):
3886     if self.op.instances:
3887       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3888       # Not getting the node allocation lock as only a specific set of
3889       # instances (and their nodes) is going to be acquired
3890       self.needed_locks = {
3891         locking.LEVEL_NODE_RES: [],
3892         locking.LEVEL_INSTANCE: self.wanted_names,
3893         }
3894       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3895     else:
3896       self.wanted_names = None
3897       self.needed_locks = {
3898         locking.LEVEL_NODE_RES: locking.ALL_SET,
3899         locking.LEVEL_INSTANCE: locking.ALL_SET,
3900
3901         # This opcode is acquires the node locks for all instances
3902         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3903         }
3904
3905     self.share_locks = {
3906       locking.LEVEL_NODE_RES: 1,
3907       locking.LEVEL_INSTANCE: 0,
3908       locking.LEVEL_NODE_ALLOC: 1,
3909       }
3910
3911   def DeclareLocks(self, level):
3912     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3913       self._LockInstancesNodes(primary_only=True, level=level)
3914
3915   def CheckPrereq(self):
3916     """Check prerequisites.
3917
3918     This only checks the optional instance list against the existing names.
3919
3920     """
3921     if self.wanted_names is None:
3922       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3923
3924     self.wanted_instances = \
3925         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3926
3927   def _EnsureChildSizes(self, disk):
3928     """Ensure children of the disk have the needed disk size.
3929
3930     This is valid mainly for DRBD8 and fixes an issue where the
3931     children have smaller disk size.
3932
3933     @param disk: an L{ganeti.objects.Disk} object
3934
3935     """
3936     if disk.dev_type == constants.LD_DRBD8:
3937       assert disk.children, "Empty children for DRBD8?"
3938       fchild = disk.children[0]
3939       mismatch = fchild.size < disk.size
3940       if mismatch:
3941         self.LogInfo("Child disk has size %d, parent %d, fixing",
3942                      fchild.size, disk.size)
3943         fchild.size = disk.size
3944
3945       # and we recurse on this child only, not on the metadev
3946       return self._EnsureChildSizes(fchild) or mismatch
3947     else:
3948       return False
3949
3950   def Exec(self, feedback_fn):
3951     """Verify the size of cluster disks.
3952
3953     """
3954     # TODO: check child disks too
3955     # TODO: check differences in size between primary/secondary nodes
3956     per_node_disks = {}
3957     for instance in self.wanted_instances:
3958       pnode = instance.primary_node
3959       if pnode not in per_node_disks:
3960         per_node_disks[pnode] = []
3961       for idx, disk in enumerate(instance.disks):
3962         per_node_disks[pnode].append((instance, idx, disk))
3963
3964     assert not (frozenset(per_node_disks.keys()) -
3965                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3966       "Not owning correct locks"
3967     assert not self.owned_locks(locking.LEVEL_NODE)
3968
3969     changed = []
3970     for node, dskl in per_node_disks.items():
3971       newl = [v[2].Copy() for v in dskl]
3972       for dsk in newl:
3973         self.cfg.SetDiskID(dsk, node)
3974       result = self.rpc.call_blockdev_getsize(node, newl)
3975       if result.fail_msg:
3976         self.LogWarning("Failure in blockdev_getsize call to node"
3977                         " %s, ignoring", node)
3978         continue
3979       if len(result.payload) != len(dskl):
3980         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3981                         " result.payload=%s", node, len(dskl), result.payload)
3982         self.LogWarning("Invalid result from node %s, ignoring node results",
3983                         node)
3984         continue
3985       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3986         if size is None:
3987           self.LogWarning("Disk %d of instance %s did not return size"
3988                           " information, ignoring", idx, instance.name)
3989           continue
3990         if not isinstance(size, (int, long)):
3991           self.LogWarning("Disk %d of instance %s did not return valid"
3992                           " size information, ignoring", idx, instance.name)
3993           continue
3994         size = size >> 20
3995         if size != disk.size:
3996           self.LogInfo("Disk %d of instance %s has mismatched size,"
3997                        " correcting: recorded %d, actual %d", idx,
3998                        instance.name, disk.size, size)
3999           disk.size = size
4000           self.cfg.Update(instance, feedback_fn)
4001           changed.append((instance.name, idx, size))
4002         if self._EnsureChildSizes(disk):
4003           self.cfg.Update(instance, feedback_fn)
4004           changed.append((instance.name, idx, disk.size))
4005     return changed
4006
4007
4008 class LUClusterRename(LogicalUnit):
4009   """Rename the cluster.
4010
4011   """
4012   HPATH = "cluster-rename"
4013   HTYPE = constants.HTYPE_CLUSTER
4014
4015   def BuildHooksEnv(self):
4016     """Build hooks env.
4017
4018     """
4019     return {
4020       "OP_TARGET": self.cfg.GetClusterName(),
4021       "NEW_NAME": self.op.name,
4022       }
4023
4024   def BuildHooksNodes(self):
4025     """Build hooks nodes.
4026
4027     """
4028     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4029
4030   def CheckPrereq(self):
4031     """Verify that the passed name is a valid one.
4032
4033     """
4034     hostname = netutils.GetHostname(name=self.op.name,
4035                                     family=self.cfg.GetPrimaryIPFamily())
4036
4037     new_name = hostname.name
4038     self.ip = new_ip = hostname.ip
4039     old_name = self.cfg.GetClusterName()
4040     old_ip = self.cfg.GetMasterIP()
4041     if new_name == old_name and new_ip == old_ip:
4042       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4043                                  " cluster has changed",
4044                                  errors.ECODE_INVAL)
4045     if new_ip != old_ip:
4046       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4047         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4048                                    " reachable on the network" %
4049                                    new_ip, errors.ECODE_NOTUNIQUE)
4050
4051     self.op.name = new_name
4052
4053   def Exec(self, feedback_fn):
4054     """Rename the cluster.
4055
4056     """
4057     clustername = self.op.name
4058     new_ip = self.ip
4059
4060     # shutdown the master IP
4061     master_params = self.cfg.GetMasterNetworkParameters()
4062     ems = self.cfg.GetUseExternalMipScript()
4063     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4064                                                      master_params, ems)
4065     result.Raise("Could not disable the master role")
4066
4067     try:
4068       cluster = self.cfg.GetClusterInfo()
4069       cluster.cluster_name = clustername
4070       cluster.master_ip = new_ip
4071       self.cfg.Update(cluster, feedback_fn)
4072
4073       # update the known hosts file
4074       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4075       node_list = self.cfg.GetOnlineNodeList()
4076       try:
4077         node_list.remove(master_params.name)
4078       except ValueError:
4079         pass
4080       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4081     finally:
4082       master_params.ip = new_ip
4083       result = self.rpc.call_node_activate_master_ip(master_params.name,
4084                                                      master_params, ems)
4085       msg = result.fail_msg
4086       if msg:
4087         self.LogWarning("Could not re-enable the master role on"
4088                         " the master, please restart manually: %s", msg)
4089
4090     return clustername
4091
4092
4093 def _ValidateNetmask(cfg, netmask):
4094   """Checks if a netmask is valid.
4095
4096   @type cfg: L{config.ConfigWriter}
4097   @param cfg: The cluster configuration
4098   @type netmask: int
4099   @param netmask: the netmask to be verified
4100   @raise errors.OpPrereqError: if the validation fails
4101
4102   """
4103   ip_family = cfg.GetPrimaryIPFamily()
4104   try:
4105     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4106   except errors.ProgrammerError:
4107     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4108                                ip_family, errors.ECODE_INVAL)
4109   if not ipcls.ValidateNetmask(netmask):
4110     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4111                                 (netmask), errors.ECODE_INVAL)
4112
4113
4114 class LUClusterSetParams(LogicalUnit):
4115   """Change the parameters of the cluster.
4116
4117   """
4118   HPATH = "cluster-modify"
4119   HTYPE = constants.HTYPE_CLUSTER
4120   REQ_BGL = False
4121
4122   def CheckArguments(self):
4123     """Check parameters
4124
4125     """
4126     if self.op.uid_pool:
4127       uidpool.CheckUidPool(self.op.uid_pool)
4128
4129     if self.op.add_uids:
4130       uidpool.CheckUidPool(self.op.add_uids)
4131
4132     if self.op.remove_uids:
4133       uidpool.CheckUidPool(self.op.remove_uids)
4134
4135     if self.op.master_netmask is not None:
4136       _ValidateNetmask(self.cfg, self.op.master_netmask)
4137
4138     if self.op.diskparams:
4139       for dt_params in self.op.diskparams.values():
4140         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4141       try:
4142         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4143       except errors.OpPrereqError, err:
4144         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4145                                    errors.ECODE_INVAL)
4146
4147   def ExpandNames(self):
4148     # FIXME: in the future maybe other cluster params won't require checking on
4149     # all nodes to be modified.
4150     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4151     # resource locks the right thing, shouldn't it be the BGL instead?
4152     self.needed_locks = {
4153       locking.LEVEL_NODE: locking.ALL_SET,
4154       locking.LEVEL_INSTANCE: locking.ALL_SET,
4155       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4156       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4157     }
4158     self.share_locks = _ShareAll()
4159
4160   def BuildHooksEnv(self):
4161     """Build hooks env.
4162
4163     """
4164     return {
4165       "OP_TARGET": self.cfg.GetClusterName(),
4166       "NEW_VG_NAME": self.op.vg_name,
4167       }
4168
4169   def BuildHooksNodes(self):
4170     """Build hooks nodes.
4171
4172     """
4173     mn = self.cfg.GetMasterNode()
4174     return ([mn], [mn])
4175
4176   def CheckPrereq(self):
4177     """Check prerequisites.
4178
4179     This checks whether the given params don't conflict and
4180     if the given volume group is valid.
4181
4182     """
4183     if self.op.vg_name is not None and not self.op.vg_name:
4184       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4185         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4186                                    " instances exist", errors.ECODE_INVAL)
4187
4188     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4189       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4190         raise errors.OpPrereqError("Cannot disable drbd helper while"
4191                                    " drbd-based instances exist",
4192                                    errors.ECODE_INVAL)
4193
4194     node_list = self.owned_locks(locking.LEVEL_NODE)
4195
4196     # if vg_name not None, checks given volume group on all nodes
4197     if self.op.vg_name:
4198       vglist = self.rpc.call_vg_list(node_list)
4199       for node in node_list:
4200         msg = vglist[node].fail_msg
4201         if msg:
4202           # ignoring down node
4203           self.LogWarning("Error while gathering data on node %s"
4204                           " (ignoring node): %s", node, msg)
4205           continue
4206         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4207                                               self.op.vg_name,
4208                                               constants.MIN_VG_SIZE)
4209         if vgstatus:
4210           raise errors.OpPrereqError("Error on node '%s': %s" %
4211                                      (node, vgstatus), errors.ECODE_ENVIRON)
4212
4213     if self.op.drbd_helper:
4214       # checks given drbd helper on all nodes
4215       helpers = self.rpc.call_drbd_helper(node_list)
4216       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4217         if ninfo.offline:
4218           self.LogInfo("Not checking drbd helper on offline node %s", node)
4219           continue
4220         msg = helpers[node].fail_msg
4221         if msg:
4222           raise errors.OpPrereqError("Error checking drbd helper on node"
4223                                      " '%s': %s" % (node, msg),
4224                                      errors.ECODE_ENVIRON)
4225         node_helper = helpers[node].payload
4226         if node_helper != self.op.drbd_helper:
4227           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4228                                      (node, node_helper), errors.ECODE_ENVIRON)
4229
4230     self.cluster = cluster = self.cfg.GetClusterInfo()
4231     # validate params changes
4232     if self.op.beparams:
4233       objects.UpgradeBeParams(self.op.beparams)
4234       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4235       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4236
4237     if self.op.ndparams:
4238       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4239       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4240
4241       # TODO: we need a more general way to handle resetting
4242       # cluster-level parameters to default values
4243       if self.new_ndparams["oob_program"] == "":
4244         self.new_ndparams["oob_program"] = \
4245             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4246
4247     if self.op.hv_state:
4248       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4249                                             self.cluster.hv_state_static)
4250       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4251                                for hv, values in new_hv_state.items())
4252
4253     if self.op.disk_state:
4254       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4255                                                 self.cluster.disk_state_static)
4256       self.new_disk_state = \
4257         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4258                             for name, values in svalues.items()))
4259              for storage, svalues in new_disk_state.items())
4260
4261     if self.op.ipolicy:
4262       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4263                                             group_policy=False)
4264
4265       all_instances = self.cfg.GetAllInstancesInfo().values()
4266       violations = set()
4267       for group in self.cfg.GetAllNodeGroupsInfo().values():
4268         instances = frozenset([inst for inst in all_instances
4269                                if compat.any(node in group.members
4270                                              for node in inst.all_nodes)])
4271         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4272         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4273         new = _ComputeNewInstanceViolations(ipol,
4274                                             new_ipolicy, instances)
4275         if new:
4276           violations.update(new)
4277
4278       if violations:
4279         self.LogWarning("After the ipolicy change the following instances"
4280                         " violate them: %s",
4281                         utils.CommaJoin(utils.NiceSort(violations)))
4282
4283     if self.op.nicparams:
4284       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4285       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4286       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4287       nic_errors = []
4288
4289       # check all instances for consistency
4290       for instance in self.cfg.GetAllInstancesInfo().values():
4291         for nic_idx, nic in enumerate(instance.nics):
4292           params_copy = copy.deepcopy(nic.nicparams)
4293           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4294
4295           # check parameter syntax
4296           try:
4297             objects.NIC.CheckParameterSyntax(params_filled)
4298           except errors.ConfigurationError, err:
4299             nic_errors.append("Instance %s, nic/%d: %s" %
4300                               (instance.name, nic_idx, err))
4301
4302           # if we're moving instances to routed, check that they have an ip
4303           target_mode = params_filled[constants.NIC_MODE]
4304           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4305             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4306                               " address" % (instance.name, nic_idx))
4307       if nic_errors:
4308         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4309                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4310
4311     # hypervisor list/parameters
4312     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4313     if self.op.hvparams:
4314       for hv_name, hv_dict in self.op.hvparams.items():
4315         if hv_name not in self.new_hvparams:
4316           self.new_hvparams[hv_name] = hv_dict
4317         else:
4318           self.new_hvparams[hv_name].update(hv_dict)
4319
4320     # disk template parameters
4321     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4322     if self.op.diskparams:
4323       for dt_name, dt_params in self.op.diskparams.items():
4324         if dt_name not in self.op.diskparams:
4325           self.new_diskparams[dt_name] = dt_params
4326         else:
4327           self.new_diskparams[dt_name].update(dt_params)
4328
4329     # os hypervisor parameters
4330     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4331     if self.op.os_hvp:
4332       for os_name, hvs in self.op.os_hvp.items():
4333         if os_name not in self.new_os_hvp:
4334           self.new_os_hvp[os_name] = hvs
4335         else:
4336           for hv_name, hv_dict in hvs.items():
4337             if hv_dict is None:
4338               # Delete if it exists
4339               self.new_os_hvp[os_name].pop(hv_name, None)
4340             elif hv_name not in self.new_os_hvp[os_name]:
4341               self.new_os_hvp[os_name][hv_name] = hv_dict
4342             else:
4343               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4344
4345     # os parameters
4346     self.new_osp = objects.FillDict(cluster.osparams, {})
4347     if self.op.osparams:
4348       for os_name, osp in self.op.osparams.items():
4349         if os_name not in self.new_osp:
4350           self.new_osp[os_name] = {}
4351
4352         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4353                                                   use_none=True)
4354
4355         if not self.new_osp[os_name]:
4356           # we removed all parameters
4357           del self.new_osp[os_name]
4358         else:
4359           # check the parameter validity (remote check)
4360           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4361                          os_name, self.new_osp[os_name])
4362
4363     # changes to the hypervisor list
4364     if self.op.enabled_hypervisors is not None:
4365       self.hv_list = self.op.enabled_hypervisors
4366       for hv in self.hv_list:
4367         # if the hypervisor doesn't already exist in the cluster
4368         # hvparams, we initialize it to empty, and then (in both
4369         # cases) we make sure to fill the defaults, as we might not
4370         # have a complete defaults list if the hypervisor wasn't
4371         # enabled before
4372         if hv not in new_hvp:
4373           new_hvp[hv] = {}
4374         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4375         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4376     else:
4377       self.hv_list = cluster.enabled_hypervisors
4378
4379     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4380       # either the enabled list has changed, or the parameters have, validate
4381       for hv_name, hv_params in self.new_hvparams.items():
4382         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4383             (self.op.enabled_hypervisors and
4384              hv_name in self.op.enabled_hypervisors)):
4385           # either this is a new hypervisor, or its parameters have changed
4386           hv_class = hypervisor.GetHypervisorClass(hv_name)
4387           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4388           hv_class.CheckParameterSyntax(hv_params)
4389           _CheckHVParams(self, node_list, hv_name, hv_params)
4390
4391     if self.op.os_hvp:
4392       # no need to check any newly-enabled hypervisors, since the
4393       # defaults have already been checked in the above code-block
4394       for os_name, os_hvp in self.new_os_hvp.items():
4395         for hv_name, hv_params in os_hvp.items():
4396           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4397           # we need to fill in the new os_hvp on top of the actual hv_p
4398           cluster_defaults = self.new_hvparams.get(hv_name, {})
4399           new_osp = objects.FillDict(cluster_defaults, hv_params)
4400           hv_class = hypervisor.GetHypervisorClass(hv_name)
4401           hv_class.CheckParameterSyntax(new_osp)
4402           _CheckHVParams(self, node_list, hv_name, new_osp)
4403
4404     if self.op.default_iallocator:
4405       alloc_script = utils.FindFile(self.op.default_iallocator,
4406                                     constants.IALLOCATOR_SEARCH_PATH,
4407                                     os.path.isfile)
4408       if alloc_script is None:
4409         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4410                                    " specified" % self.op.default_iallocator,
4411                                    errors.ECODE_INVAL)
4412
4413   def Exec(self, feedback_fn):
4414     """Change the parameters of the cluster.
4415
4416     """
4417     if self.op.vg_name is not None:
4418       new_volume = self.op.vg_name
4419       if not new_volume:
4420         new_volume = None
4421       if new_volume != self.cfg.GetVGName():
4422         self.cfg.SetVGName(new_volume)
4423       else:
4424         feedback_fn("Cluster LVM configuration already in desired"
4425                     " state, not changing")
4426     if self.op.drbd_helper is not None:
4427       new_helper = self.op.drbd_helper
4428       if not new_helper:
4429         new_helper = None
4430       if new_helper != self.cfg.GetDRBDHelper():
4431         self.cfg.SetDRBDHelper(new_helper)
4432       else:
4433         feedback_fn("Cluster DRBD helper already in desired state,"
4434                     " not changing")
4435     if self.op.hvparams:
4436       self.cluster.hvparams = self.new_hvparams
4437     if self.op.os_hvp:
4438       self.cluster.os_hvp = self.new_os_hvp
4439     if self.op.enabled_hypervisors is not None:
4440       self.cluster.hvparams = self.new_hvparams
4441       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4442     if self.op.beparams:
4443       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4444     if self.op.nicparams:
4445       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4446     if self.op.ipolicy:
4447       self.cluster.ipolicy = self.new_ipolicy
4448     if self.op.osparams:
4449       self.cluster.osparams = self.new_osp
4450     if self.op.ndparams:
4451       self.cluster.ndparams = self.new_ndparams
4452     if self.op.diskparams:
4453       self.cluster.diskparams = self.new_diskparams
4454     if self.op.hv_state:
4455       self.cluster.hv_state_static = self.new_hv_state
4456     if self.op.disk_state:
4457       self.cluster.disk_state_static = self.new_disk_state
4458
4459     if self.op.candidate_pool_size is not None:
4460       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4461       # we need to update the pool size here, otherwise the save will fail
4462       _AdjustCandidatePool(self, [])
4463
4464     if self.op.maintain_node_health is not None:
4465       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4466         feedback_fn("Note: CONFD was disabled at build time, node health"
4467                     " maintenance is not useful (still enabling it)")
4468       self.cluster.maintain_node_health = self.op.maintain_node_health
4469
4470     if self.op.prealloc_wipe_disks is not None:
4471       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4472
4473     if self.op.add_uids is not None:
4474       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4475
4476     if self.op.remove_uids is not None:
4477       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4478
4479     if self.op.uid_pool is not None:
4480       self.cluster.uid_pool = self.op.uid_pool
4481
4482     if self.op.default_iallocator is not None:
4483       self.cluster.default_iallocator = self.op.default_iallocator
4484
4485     if self.op.reserved_lvs is not None:
4486       self.cluster.reserved_lvs = self.op.reserved_lvs
4487
4488     if self.op.use_external_mip_script is not None:
4489       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4490
4491     def helper_os(aname, mods, desc):
4492       desc += " OS list"
4493       lst = getattr(self.cluster, aname)
4494       for key, val in mods:
4495         if key == constants.DDM_ADD:
4496           if val in lst:
4497             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4498           else:
4499             lst.append(val)
4500         elif key == constants.DDM_REMOVE:
4501           if val in lst:
4502             lst.remove(val)
4503           else:
4504             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4505         else:
4506           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4507
4508     if self.op.hidden_os:
4509       helper_os("hidden_os", self.op.hidden_os, "hidden")
4510
4511     if self.op.blacklisted_os:
4512       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4513
4514     if self.op.master_netdev:
4515       master_params = self.cfg.GetMasterNetworkParameters()
4516       ems = self.cfg.GetUseExternalMipScript()
4517       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4518                   self.cluster.master_netdev)
4519       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4520                                                        master_params, ems)
4521       result.Raise("Could not disable the master ip")
4522       feedback_fn("Changing master_netdev from %s to %s" %
4523                   (master_params.netdev, self.op.master_netdev))
4524       self.cluster.master_netdev = self.op.master_netdev
4525
4526     if self.op.master_netmask:
4527       master_params = self.cfg.GetMasterNetworkParameters()
4528       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4529       result = self.rpc.call_node_change_master_netmask(master_params.name,
4530                                                         master_params.netmask,
4531                                                         self.op.master_netmask,
4532                                                         master_params.ip,
4533                                                         master_params.netdev)
4534       if result.fail_msg:
4535         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4536         feedback_fn(msg)
4537
4538       self.cluster.master_netmask = self.op.master_netmask
4539
4540     self.cfg.Update(self.cluster, feedback_fn)
4541
4542     if self.op.master_netdev:
4543       master_params = self.cfg.GetMasterNetworkParameters()
4544       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4545                   self.op.master_netdev)
4546       ems = self.cfg.GetUseExternalMipScript()
4547       result = self.rpc.call_node_activate_master_ip(master_params.name,
4548                                                      master_params, ems)
4549       if result.fail_msg:
4550         self.LogWarning("Could not re-enable the master ip on"
4551                         " the master, please restart manually: %s",
4552                         result.fail_msg)
4553
4554
4555 def _UploadHelper(lu, nodes, fname):
4556   """Helper for uploading a file and showing warnings.
4557
4558   """
4559   if os.path.exists(fname):
4560     result = lu.rpc.call_upload_file(nodes, fname)
4561     for to_node, to_result in result.items():
4562       msg = to_result.fail_msg
4563       if msg:
4564         msg = ("Copy of file %s to node %s failed: %s" %
4565                (fname, to_node, msg))
4566         lu.LogWarning(msg)
4567
4568
4569 def _ComputeAncillaryFiles(cluster, redist):
4570   """Compute files external to Ganeti which need to be consistent.
4571
4572   @type redist: boolean
4573   @param redist: Whether to include files which need to be redistributed
4574
4575   """
4576   # Compute files for all nodes
4577   files_all = set([
4578     pathutils.SSH_KNOWN_HOSTS_FILE,
4579     pathutils.CONFD_HMAC_KEY,
4580     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4581     pathutils.SPICE_CERT_FILE,
4582     pathutils.SPICE_CACERT_FILE,
4583     pathutils.RAPI_USERS_FILE,
4584     ])
4585
4586   if redist:
4587     # we need to ship at least the RAPI certificate
4588     files_all.add(pathutils.RAPI_CERT_FILE)
4589   else:
4590     files_all.update(pathutils.ALL_CERT_FILES)
4591     files_all.update(ssconf.SimpleStore().GetFileList())
4592
4593   if cluster.modify_etc_hosts:
4594     files_all.add(pathutils.ETC_HOSTS)
4595
4596   if cluster.use_external_mip_script:
4597     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4598
4599   # Files which are optional, these must:
4600   # - be present in one other category as well
4601   # - either exist or not exist on all nodes of that category (mc, vm all)
4602   files_opt = set([
4603     pathutils.RAPI_USERS_FILE,
4604     ])
4605
4606   # Files which should only be on master candidates
4607   files_mc = set()
4608
4609   if not redist:
4610     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4611
4612   # File storage
4613   if (not redist and
4614       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4615     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4616     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4617
4618   # Files which should only be on VM-capable nodes
4619   files_vm = set(
4620     filename
4621     for hv_name in cluster.enabled_hypervisors
4622     for filename in
4623       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4624
4625   files_opt |= set(
4626     filename
4627     for hv_name in cluster.enabled_hypervisors
4628     for filename in
4629       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4630
4631   # Filenames in each category must be unique
4632   all_files_set = files_all | files_mc | files_vm
4633   assert (len(all_files_set) ==
4634           sum(map(len, [files_all, files_mc, files_vm]))), \
4635          "Found file listed in more than one file list"
4636
4637   # Optional files must be present in one other category
4638   assert all_files_set.issuperset(files_opt), \
4639          "Optional file not in a different required list"
4640
4641   # This one file should never ever be re-distributed via RPC
4642   assert not (redist and
4643               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4644
4645   return (files_all, files_opt, files_mc, files_vm)
4646
4647
4648 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4649   """Distribute additional files which are part of the cluster configuration.
4650
4651   ConfigWriter takes care of distributing the config and ssconf files, but
4652   there are more files which should be distributed to all nodes. This function
4653   makes sure those are copied.
4654
4655   @param lu: calling logical unit
4656   @param additional_nodes: list of nodes not in the config to distribute to
4657   @type additional_vm: boolean
4658   @param additional_vm: whether the additional nodes are vm-capable or not
4659
4660   """
4661   # Gather target nodes
4662   cluster = lu.cfg.GetClusterInfo()
4663   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4664
4665   online_nodes = lu.cfg.GetOnlineNodeList()
4666   online_set = frozenset(online_nodes)
4667   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4668
4669   if additional_nodes is not None:
4670     online_nodes.extend(additional_nodes)
4671     if additional_vm:
4672       vm_nodes.extend(additional_nodes)
4673
4674   # Never distribute to master node
4675   for nodelist in [online_nodes, vm_nodes]:
4676     if master_info.name in nodelist:
4677       nodelist.remove(master_info.name)
4678
4679   # Gather file lists
4680   (files_all, _, files_mc, files_vm) = \
4681     _ComputeAncillaryFiles(cluster, True)
4682
4683   # Never re-distribute configuration file from here
4684   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4685               pathutils.CLUSTER_CONF_FILE in files_vm)
4686   assert not files_mc, "Master candidates not handled in this function"
4687
4688   filemap = [
4689     (online_nodes, files_all),
4690     (vm_nodes, files_vm),
4691     ]
4692
4693   # Upload the files
4694   for (node_list, files) in filemap:
4695     for fname in files:
4696       _UploadHelper(lu, node_list, fname)
4697
4698
4699 class LUClusterRedistConf(NoHooksLU):
4700   """Force the redistribution of cluster configuration.
4701
4702   This is a very simple LU.
4703
4704   """
4705   REQ_BGL = False
4706
4707   def ExpandNames(self):
4708     self.needed_locks = {
4709       locking.LEVEL_NODE: locking.ALL_SET,
4710       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4711     }
4712     self.share_locks = _ShareAll()
4713
4714   def Exec(self, feedback_fn):
4715     """Redistribute the configuration.
4716
4717     """
4718     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4719     _RedistributeAncillaryFiles(self)
4720
4721
4722 class LUClusterActivateMasterIp(NoHooksLU):
4723   """Activate the master IP on the master node.
4724
4725   """
4726   def Exec(self, feedback_fn):
4727     """Activate the master IP.
4728
4729     """
4730     master_params = self.cfg.GetMasterNetworkParameters()
4731     ems = self.cfg.GetUseExternalMipScript()
4732     result = self.rpc.call_node_activate_master_ip(master_params.name,
4733                                                    master_params, ems)
4734     result.Raise("Could not activate the master IP")
4735
4736
4737 class LUClusterDeactivateMasterIp(NoHooksLU):
4738   """Deactivate the master IP on the master node.
4739
4740   """
4741   def Exec(self, feedback_fn):
4742     """Deactivate the master IP.
4743
4744     """
4745     master_params = self.cfg.GetMasterNetworkParameters()
4746     ems = self.cfg.GetUseExternalMipScript()
4747     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4748                                                      master_params, ems)
4749     result.Raise("Could not deactivate the master IP")
4750
4751
4752 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4753   """Sleep and poll for an instance's disk to sync.
4754
4755   """
4756   if not instance.disks or disks is not None and not disks:
4757     return True
4758
4759   disks = _ExpandCheckDisks(instance, disks)
4760
4761   if not oneshot:
4762     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4763
4764   node = instance.primary_node
4765
4766   for dev in disks:
4767     lu.cfg.SetDiskID(dev, node)
4768
4769   # TODO: Convert to utils.Retry
4770
4771   retries = 0
4772   degr_retries = 10 # in seconds, as we sleep 1 second each time
4773   while True:
4774     max_time = 0
4775     done = True
4776     cumul_degraded = False
4777     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4778     msg = rstats.fail_msg
4779     if msg:
4780       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4781       retries += 1
4782       if retries >= 10:
4783         raise errors.RemoteError("Can't contact node %s for mirror data,"
4784                                  " aborting." % node)
4785       time.sleep(6)
4786       continue
4787     rstats = rstats.payload
4788     retries = 0
4789     for i, mstat in enumerate(rstats):
4790       if mstat is None:
4791         lu.LogWarning("Can't compute data for node %s/%s",
4792                            node, disks[i].iv_name)
4793         continue
4794
4795       cumul_degraded = (cumul_degraded or
4796                         (mstat.is_degraded and mstat.sync_percent is None))
4797       if mstat.sync_percent is not None:
4798         done = False
4799         if mstat.estimated_time is not None:
4800           rem_time = ("%s remaining (estimated)" %
4801                       utils.FormatSeconds(mstat.estimated_time))
4802           max_time = mstat.estimated_time
4803         else:
4804           rem_time = "no time estimate"
4805         lu.LogInfo("- device %s: %5.2f%% done, %s",
4806                    disks[i].iv_name, mstat.sync_percent, rem_time)
4807
4808     # if we're done but degraded, let's do a few small retries, to
4809     # make sure we see a stable and not transient situation; therefore
4810     # we force restart of the loop
4811     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4812       logging.info("Degraded disks found, %d retries left", degr_retries)
4813       degr_retries -= 1
4814       time.sleep(1)
4815       continue
4816
4817     if done or oneshot:
4818       break
4819
4820     time.sleep(min(60, max_time))
4821
4822   if done:
4823     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4824
4825   return not cumul_degraded
4826
4827
4828 def _BlockdevFind(lu, node, dev, instance):
4829   """Wrapper around call_blockdev_find to annotate diskparams.
4830
4831   @param lu: A reference to the lu object
4832   @param node: The node to call out
4833   @param dev: The device to find
4834   @param instance: The instance object the device belongs to
4835   @returns The result of the rpc call
4836
4837   """
4838   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4839   return lu.rpc.call_blockdev_find(node, disk)
4840
4841
4842 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4843   """Wrapper around L{_CheckDiskConsistencyInner}.
4844
4845   """
4846   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4847   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4848                                     ldisk=ldisk)
4849
4850
4851 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4852                                ldisk=False):
4853   """Check that mirrors are not degraded.
4854
4855   @attention: The device has to be annotated already.
4856
4857   The ldisk parameter, if True, will change the test from the
4858   is_degraded attribute (which represents overall non-ok status for
4859   the device(s)) to the ldisk (representing the local storage status).
4860
4861   """
4862   lu.cfg.SetDiskID(dev, node)
4863
4864   result = True
4865
4866   if on_primary or dev.AssembleOnSecondary():
4867     rstats = lu.rpc.call_blockdev_find(node, dev)
4868     msg = rstats.fail_msg
4869     if msg:
4870       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4871       result = False
4872     elif not rstats.payload:
4873       lu.LogWarning("Can't find disk on node %s", node)
4874       result = False
4875     else:
4876       if ldisk:
4877         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4878       else:
4879         result = result and not rstats.payload.is_degraded
4880
4881   if dev.children:
4882     for child in dev.children:
4883       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4884                                                      on_primary)
4885
4886   return result
4887
4888
4889 class LUOobCommand(NoHooksLU):
4890   """Logical unit for OOB handling.
4891
4892   """
4893   REQ_BGL = False
4894   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4895
4896   def ExpandNames(self):
4897     """Gather locks we need.
4898
4899     """
4900     if self.op.node_names:
4901       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4902       lock_names = self.op.node_names
4903     else:
4904       lock_names = locking.ALL_SET
4905
4906     self.needed_locks = {
4907       locking.LEVEL_NODE: lock_names,
4908       }
4909
4910     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4911
4912     if not self.op.node_names:
4913       # Acquire node allocation lock only if all nodes are affected
4914       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4915
4916   def CheckPrereq(self):
4917     """Check prerequisites.
4918
4919     This checks:
4920      - the node exists in the configuration
4921      - OOB is supported
4922
4923     Any errors are signaled by raising errors.OpPrereqError.
4924
4925     """
4926     self.nodes = []
4927     self.master_node = self.cfg.GetMasterNode()
4928
4929     assert self.op.power_delay >= 0.0
4930
4931     if self.op.node_names:
4932       if (self.op.command in self._SKIP_MASTER and
4933           self.master_node in self.op.node_names):
4934         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4935         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4936
4937         if master_oob_handler:
4938           additional_text = ("run '%s %s %s' if you want to operate on the"
4939                              " master regardless") % (master_oob_handler,
4940                                                       self.op.command,
4941                                                       self.master_node)
4942         else:
4943           additional_text = "it does not support out-of-band operations"
4944
4945         raise errors.OpPrereqError(("Operating on the master node %s is not"
4946                                     " allowed for %s; %s") %
4947                                    (self.master_node, self.op.command,
4948                                     additional_text), errors.ECODE_INVAL)
4949     else:
4950       self.op.node_names = self.cfg.GetNodeList()
4951       if self.op.command in self._SKIP_MASTER:
4952         self.op.node_names.remove(self.master_node)
4953
4954     if self.op.command in self._SKIP_MASTER:
4955       assert self.master_node not in self.op.node_names
4956
4957     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4958       if node is None:
4959         raise errors.OpPrereqError("Node %s not found" % node_name,
4960                                    errors.ECODE_NOENT)
4961       else:
4962         self.nodes.append(node)
4963
4964       if (not self.op.ignore_status and
4965           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4966         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4967                                     " not marked offline") % node_name,
4968                                    errors.ECODE_STATE)
4969
4970   def Exec(self, feedback_fn):
4971     """Execute OOB and return result if we expect any.
4972
4973     """
4974     master_node = self.master_node
4975     ret = []
4976
4977     for idx, node in enumerate(utils.NiceSort(self.nodes,
4978                                               key=lambda node: node.name)):
4979       node_entry = [(constants.RS_NORMAL, node.name)]
4980       ret.append(node_entry)
4981
4982       oob_program = _SupportsOob(self.cfg, node)
4983
4984       if not oob_program:
4985         node_entry.append((constants.RS_UNAVAIL, None))
4986         continue
4987
4988       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4989                    self.op.command, oob_program, node.name)
4990       result = self.rpc.call_run_oob(master_node, oob_program,
4991                                      self.op.command, node.name,
4992                                      self.op.timeout)
4993
4994       if result.fail_msg:
4995         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4996                         node.name, result.fail_msg)
4997         node_entry.append((constants.RS_NODATA, None))
4998       else:
4999         try:
5000           self._CheckPayload(result)
5001         except errors.OpExecError, err:
5002           self.LogWarning("Payload returned by node '%s' is not valid: %s",
5003                           node.name, err)
5004           node_entry.append((constants.RS_NODATA, None))
5005         else:
5006           if self.op.command == constants.OOB_HEALTH:
5007             # For health we should log important events
5008             for item, status in result.payload:
5009               if status in [constants.OOB_STATUS_WARNING,
5010                             constants.OOB_STATUS_CRITICAL]:
5011                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5012                                 item, node.name, status)
5013
5014           if self.op.command == constants.OOB_POWER_ON:
5015             node.powered = True
5016           elif self.op.command == constants.OOB_POWER_OFF:
5017             node.powered = False
5018           elif self.op.command == constants.OOB_POWER_STATUS:
5019             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5020             if powered != node.powered:
5021               logging.warning(("Recorded power state (%s) of node '%s' does not"
5022                                " match actual power state (%s)"), node.powered,
5023                               node.name, powered)
5024
5025           # For configuration changing commands we should update the node
5026           if self.op.command in (constants.OOB_POWER_ON,
5027                                  constants.OOB_POWER_OFF):
5028             self.cfg.Update(node, feedback_fn)
5029
5030           node_entry.append((constants.RS_NORMAL, result.payload))
5031
5032           if (self.op.command == constants.OOB_POWER_ON and
5033               idx < len(self.nodes) - 1):
5034             time.sleep(self.op.power_delay)
5035
5036     return ret
5037
5038   def _CheckPayload(self, result):
5039     """Checks if the payload is valid.
5040
5041     @param result: RPC result
5042     @raises errors.OpExecError: If payload is not valid
5043
5044     """
5045     errs = []
5046     if self.op.command == constants.OOB_HEALTH:
5047       if not isinstance(result.payload, list):
5048         errs.append("command 'health' is expected to return a list but got %s" %
5049                     type(result.payload))
5050       else:
5051         for item, status in result.payload:
5052           if status not in constants.OOB_STATUSES:
5053             errs.append("health item '%s' has invalid status '%s'" %
5054                         (item, status))
5055
5056     if self.op.command == constants.OOB_POWER_STATUS:
5057       if not isinstance(result.payload, dict):
5058         errs.append("power-status is expected to return a dict but got %s" %
5059                     type(result.payload))
5060
5061     if self.op.command in [
5062       constants.OOB_POWER_ON,
5063       constants.OOB_POWER_OFF,
5064       constants.OOB_POWER_CYCLE,
5065       ]:
5066       if result.payload is not None:
5067         errs.append("%s is expected to not return payload but got '%s'" %
5068                     (self.op.command, result.payload))
5069
5070     if errs:
5071       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5072                                utils.CommaJoin(errs))
5073
5074
5075 class _OsQuery(_QueryBase):
5076   FIELDS = query.OS_FIELDS
5077
5078   def ExpandNames(self, lu):
5079     # Lock all nodes in shared mode
5080     # Temporary removal of locks, should be reverted later
5081     # TODO: reintroduce locks when they are lighter-weight
5082     lu.needed_locks = {}
5083     #self.share_locks[locking.LEVEL_NODE] = 1
5084     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5085
5086     # The following variables interact with _QueryBase._GetNames
5087     if self.names:
5088       self.wanted = self.names
5089     else:
5090       self.wanted = locking.ALL_SET
5091
5092     self.do_locking = self.use_locking
5093
5094   def DeclareLocks(self, lu, level):
5095     pass
5096
5097   @staticmethod
5098   def _DiagnoseByOS(rlist):
5099     """Remaps a per-node return list into an a per-os per-node dictionary
5100
5101     @param rlist: a map with node names as keys and OS objects as values
5102
5103     @rtype: dict
5104     @return: a dictionary with osnames as keys and as value another
5105         map, with nodes as keys and tuples of (path, status, diagnose,
5106         variants, parameters, api_versions) as values, eg::
5107
5108           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5109                                      (/srv/..., False, "invalid api")],
5110                            "node2": [(/srv/..., True, "", [], [])]}
5111           }
5112
5113     """
5114     all_os = {}
5115     # we build here the list of nodes that didn't fail the RPC (at RPC
5116     # level), so that nodes with a non-responding node daemon don't
5117     # make all OSes invalid
5118     good_nodes = [node_name for node_name in rlist
5119                   if not rlist[node_name].fail_msg]
5120     for node_name, nr in rlist.items():
5121       if nr.fail_msg or not nr.payload:
5122         continue
5123       for (name, path, status, diagnose, variants,
5124            params, api_versions) in nr.payload:
5125         if name not in all_os:
5126           # build a list of nodes for this os containing empty lists
5127           # for each node in node_list
5128           all_os[name] = {}
5129           for nname in good_nodes:
5130             all_os[name][nname] = []
5131         # convert params from [name, help] to (name, help)
5132         params = [tuple(v) for v in params]
5133         all_os[name][node_name].append((path, status, diagnose,
5134                                         variants, params, api_versions))
5135     return all_os
5136
5137   def _GetQueryData(self, lu):
5138     """Computes the list of nodes and their attributes.
5139
5140     """
5141     # Locking is not used
5142     assert not (compat.any(lu.glm.is_owned(level)
5143                            for level in locking.LEVELS
5144                            if level != locking.LEVEL_CLUSTER) or
5145                 self.do_locking or self.use_locking)
5146
5147     valid_nodes = [node.name
5148                    for node in lu.cfg.GetAllNodesInfo().values()
5149                    if not node.offline and node.vm_capable]
5150     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5151     cluster = lu.cfg.GetClusterInfo()
5152
5153     data = {}
5154
5155     for (os_name, os_data) in pol.items():
5156       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5157                           hidden=(os_name in cluster.hidden_os),
5158                           blacklisted=(os_name in cluster.blacklisted_os))
5159
5160       variants = set()
5161       parameters = set()
5162       api_versions = set()
5163
5164       for idx, osl in enumerate(os_data.values()):
5165         info.valid = bool(info.valid and osl and osl[0][1])
5166         if not info.valid:
5167           break
5168
5169         (node_variants, node_params, node_api) = osl[0][3:6]
5170         if idx == 0:
5171           # First entry
5172           variants.update(node_variants)
5173           parameters.update(node_params)
5174           api_versions.update(node_api)
5175         else:
5176           # Filter out inconsistent values
5177           variants.intersection_update(node_variants)
5178           parameters.intersection_update(node_params)
5179           api_versions.intersection_update(node_api)
5180
5181       info.variants = list(variants)
5182       info.parameters = list(parameters)
5183       info.api_versions = list(api_versions)
5184
5185       data[os_name] = info
5186
5187     # Prepare data in requested order
5188     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5189             if name in data]
5190
5191
5192 class LUOsDiagnose(NoHooksLU):
5193   """Logical unit for OS diagnose/query.
5194
5195   """
5196   REQ_BGL = False
5197
5198   @staticmethod
5199   def _BuildFilter(fields, names):
5200     """Builds a filter for querying OSes.
5201
5202     """
5203     name_filter = qlang.MakeSimpleFilter("name", names)
5204
5205     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5206     # respective field is not requested
5207     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5208                      for fname in ["hidden", "blacklisted"]
5209                      if fname not in fields]
5210     if "valid" not in fields:
5211       status_filter.append([qlang.OP_TRUE, "valid"])
5212
5213     if status_filter:
5214       status_filter.insert(0, qlang.OP_AND)
5215     else:
5216       status_filter = None
5217
5218     if name_filter and status_filter:
5219       return [qlang.OP_AND, name_filter, status_filter]
5220     elif name_filter:
5221       return name_filter
5222     else:
5223       return status_filter
5224
5225   def CheckArguments(self):
5226     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5227                        self.op.output_fields, False)
5228
5229   def ExpandNames(self):
5230     self.oq.ExpandNames(self)
5231
5232   def Exec(self, feedback_fn):
5233     return self.oq.OldStyleQuery(self)
5234
5235
5236 class _ExtStorageQuery(_QueryBase):
5237   FIELDS = query.EXTSTORAGE_FIELDS
5238
5239   def ExpandNames(self, lu):
5240     # Lock all nodes in shared mode
5241     # Temporary removal of locks, should be reverted later
5242     # TODO: reintroduce locks when they are lighter-weight
5243     lu.needed_locks = {}
5244     #self.share_locks[locking.LEVEL_NODE] = 1
5245     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5246
5247     # The following variables interact with _QueryBase._GetNames
5248     if self.names:
5249       self.wanted = self.names
5250     else:
5251       self.wanted = locking.ALL_SET
5252
5253     self.do_locking = self.use_locking
5254
5255   def DeclareLocks(self, lu, level):
5256     pass
5257
5258   @staticmethod
5259   def _DiagnoseByProvider(rlist):
5260     """Remaps a per-node return list into an a per-provider per-node dictionary
5261
5262     @param rlist: a map with node names as keys and ExtStorage objects as values
5263
5264     @rtype: dict
5265     @return: a dictionary with extstorage providers as keys and as
5266         value another map, with nodes as keys and tuples of
5267         (path, status, diagnose, parameters) as values, eg::
5268
5269           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5270                          "node2": [(/srv/..., False, "missing file")]
5271                          "node3": [(/srv/..., True, "", [])]
5272           }
5273
5274     """
5275     all_es = {}
5276     # we build here the list of nodes that didn't fail the RPC (at RPC
5277     # level), so that nodes with a non-responding node daemon don't
5278     # make all OSes invalid
5279     good_nodes = [node_name for node_name in rlist
5280                   if not rlist[node_name].fail_msg]
5281     for node_name, nr in rlist.items():
5282       if nr.fail_msg or not nr.payload:
5283         continue
5284       for (name, path, status, diagnose, params) in nr.payload:
5285         if name not in all_es:
5286           # build a list of nodes for this os containing empty lists
5287           # for each node in node_list
5288           all_es[name] = {}
5289           for nname in good_nodes:
5290             all_es[name][nname] = []
5291         # convert params from [name, help] to (name, help)
5292         params = [tuple(v) for v in params]
5293         all_es[name][node_name].append((path, status, diagnose, params))
5294     return all_es
5295
5296   def _GetQueryData(self, lu):
5297     """Computes the list of nodes and their attributes.
5298
5299     """
5300     # Locking is not used
5301     assert not (compat.any(lu.glm.is_owned(level)
5302                            for level in locking.LEVELS
5303                            if level != locking.LEVEL_CLUSTER) or
5304                 self.do_locking or self.use_locking)
5305
5306     valid_nodes = [node.name
5307                    for node in lu.cfg.GetAllNodesInfo().values()
5308                    if not node.offline and node.vm_capable]
5309     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5310
5311     data = {}
5312
5313     nodegroup_list = lu.cfg.GetNodeGroupList()
5314
5315     for (es_name, es_data) in pol.items():
5316       # For every provider compute the nodegroup validity.
5317       # To do this we need to check the validity of each node in es_data
5318       # and then construct the corresponding nodegroup dict:
5319       #      { nodegroup1: status
5320       #        nodegroup2: status
5321       #      }
5322       ndgrp_data = {}
5323       for nodegroup in nodegroup_list:
5324         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5325
5326         nodegroup_nodes = ndgrp.members
5327         nodegroup_name = ndgrp.name
5328         node_statuses = []
5329
5330         for node in nodegroup_nodes:
5331           if node in valid_nodes:
5332             if es_data[node] != []:
5333               node_status = es_data[node][0][1]
5334               node_statuses.append(node_status)
5335             else:
5336               node_statuses.append(False)
5337
5338         if False in node_statuses:
5339           ndgrp_data[nodegroup_name] = False
5340         else:
5341           ndgrp_data[nodegroup_name] = True
5342
5343       # Compute the provider's parameters
5344       parameters = set()
5345       for idx, esl in enumerate(es_data.values()):
5346         valid = bool(esl and esl[0][1])
5347         if not valid:
5348           break
5349
5350         node_params = esl[0][3]
5351         if idx == 0:
5352           # First entry
5353           parameters.update(node_params)
5354         else:
5355           # Filter out inconsistent values
5356           parameters.intersection_update(node_params)
5357
5358       params = list(parameters)
5359
5360       # Now fill all the info for this provider
5361       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5362                                   nodegroup_status=ndgrp_data,
5363                                   parameters=params)
5364
5365       data[es_name] = info
5366
5367     # Prepare data in requested order
5368     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5369             if name in data]
5370
5371
5372 class LUExtStorageDiagnose(NoHooksLU):
5373   """Logical unit for ExtStorage diagnose/query.
5374
5375   """
5376   REQ_BGL = False
5377
5378   def CheckArguments(self):
5379     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5380                                self.op.output_fields, False)
5381
5382   def ExpandNames(self):
5383     self.eq.ExpandNames(self)
5384
5385   def Exec(self, feedback_fn):
5386     return self.eq.OldStyleQuery(self)
5387
5388
5389 class LUNodeRemove(LogicalUnit):
5390   """Logical unit for removing a node.
5391
5392   """
5393   HPATH = "node-remove"
5394   HTYPE = constants.HTYPE_NODE
5395
5396   def BuildHooksEnv(self):
5397     """Build hooks env.
5398
5399     """
5400     return {
5401       "OP_TARGET": self.op.node_name,
5402       "NODE_NAME": self.op.node_name,
5403       }
5404
5405   def BuildHooksNodes(self):
5406     """Build hooks nodes.
5407
5408     This doesn't run on the target node in the pre phase as a failed
5409     node would then be impossible to remove.
5410
5411     """
5412     all_nodes = self.cfg.GetNodeList()
5413     try:
5414       all_nodes.remove(self.op.node_name)
5415     except ValueError:
5416       pass
5417     return (all_nodes, all_nodes)
5418
5419   def CheckPrereq(self):
5420     """Check prerequisites.
5421
5422     This checks:
5423      - the node exists in the configuration
5424      - it does not have primary or secondary instances
5425      - it's not the master
5426
5427     Any errors are signaled by raising errors.OpPrereqError.
5428
5429     """
5430     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5431     node = self.cfg.GetNodeInfo(self.op.node_name)
5432     assert node is not None
5433
5434     masternode = self.cfg.GetMasterNode()
5435     if node.name == masternode:
5436       raise errors.OpPrereqError("Node is the master node, failover to another"
5437                                  " node is required", errors.ECODE_INVAL)
5438
5439     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5440       if node.name in instance.all_nodes:
5441         raise errors.OpPrereqError("Instance %s is still running on the node,"
5442                                    " please remove first" % instance_name,
5443                                    errors.ECODE_INVAL)
5444     self.op.node_name = node.name
5445     self.node = node
5446
5447   def Exec(self, feedback_fn):
5448     """Removes the node from the cluster.
5449
5450     """
5451     node = self.node
5452     logging.info("Stopping the node daemon and removing configs from node %s",
5453                  node.name)
5454
5455     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5456
5457     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5458       "Not owning BGL"
5459
5460     # Promote nodes to master candidate as needed
5461     _AdjustCandidatePool(self, exceptions=[node.name])
5462     self.context.RemoveNode(node.name)
5463
5464     # Run post hooks on the node before it's removed
5465     _RunPostHook(self, node.name)
5466
5467     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5468     msg = result.fail_msg
5469     if msg:
5470       self.LogWarning("Errors encountered on the remote node while leaving"
5471                       " the cluster: %s", msg)
5472
5473     # Remove node from our /etc/hosts
5474     if self.cfg.GetClusterInfo().modify_etc_hosts:
5475       master_node = self.cfg.GetMasterNode()
5476       result = self.rpc.call_etc_hosts_modify(master_node,
5477                                               constants.ETC_HOSTS_REMOVE,
5478                                               node.name, None)
5479       result.Raise("Can't update hosts file with new host data")
5480       _RedistributeAncillaryFiles(self)
5481
5482
5483 class _NodeQuery(_QueryBase):
5484   FIELDS = query.NODE_FIELDS
5485
5486   def ExpandNames(self, lu):
5487     lu.needed_locks = {}
5488     lu.share_locks = _ShareAll()
5489
5490     if self.names:
5491       self.wanted = _GetWantedNodes(lu, self.names)
5492     else:
5493       self.wanted = locking.ALL_SET
5494
5495     self.do_locking = (self.use_locking and
5496                        query.NQ_LIVE in self.requested_data)
5497
5498     if self.do_locking:
5499       # If any non-static field is requested we need to lock the nodes
5500       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5501       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5502
5503   def DeclareLocks(self, lu, level):
5504     pass
5505
5506   def _GetQueryData(self, lu):
5507     """Computes the list of nodes and their attributes.
5508
5509     """
5510     all_info = lu.cfg.GetAllNodesInfo()
5511
5512     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5513
5514     # Gather data as requested
5515     if query.NQ_LIVE in self.requested_data:
5516       # filter out non-vm_capable nodes
5517       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5518
5519       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5520       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5521                                         [lu.cfg.GetHypervisorType()], es_flags)
5522       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5523                        for (name, nresult) in node_data.items()
5524                        if not nresult.fail_msg and nresult.payload)
5525     else:
5526       live_data = None
5527
5528     if query.NQ_INST in self.requested_data:
5529       node_to_primary = dict([(name, set()) for name in nodenames])
5530       node_to_secondary = dict([(name, set()) for name in nodenames])
5531
5532       inst_data = lu.cfg.GetAllInstancesInfo()
5533
5534       for inst in inst_data.values():
5535         if inst.primary_node in node_to_primary:
5536           node_to_primary[inst.primary_node].add(inst.name)
5537         for secnode in inst.secondary_nodes:
5538           if secnode in node_to_secondary:
5539             node_to_secondary[secnode].add(inst.name)
5540     else:
5541       node_to_primary = None
5542       node_to_secondary = None
5543
5544     if query.NQ_OOB in self.requested_data:
5545       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5546                          for name, node in all_info.iteritems())
5547     else:
5548       oob_support = None
5549
5550     if query.NQ_GROUP in self.requested_data:
5551       groups = lu.cfg.GetAllNodeGroupsInfo()
5552     else:
5553       groups = {}
5554
5555     return query.NodeQueryData([all_info[name] for name in nodenames],
5556                                live_data, lu.cfg.GetMasterNode(),
5557                                node_to_primary, node_to_secondary, groups,
5558                                oob_support, lu.cfg.GetClusterInfo())
5559
5560
5561 class LUNodeQuery(NoHooksLU):
5562   """Logical unit for querying nodes.
5563
5564   """
5565   # pylint: disable=W0142
5566   REQ_BGL = False
5567
5568   def CheckArguments(self):
5569     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5570                          self.op.output_fields, self.op.use_locking)
5571
5572   def ExpandNames(self):
5573     self.nq.ExpandNames(self)
5574
5575   def DeclareLocks(self, level):
5576     self.nq.DeclareLocks(self, level)
5577
5578   def Exec(self, feedback_fn):
5579     return self.nq.OldStyleQuery(self)
5580
5581
5582 class LUNodeQueryvols(NoHooksLU):
5583   """Logical unit for getting volumes on node(s).
5584
5585   """
5586   REQ_BGL = False
5587   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5588   _FIELDS_STATIC = utils.FieldSet("node")
5589
5590   def CheckArguments(self):
5591     _CheckOutputFields(static=self._FIELDS_STATIC,
5592                        dynamic=self._FIELDS_DYNAMIC,
5593                        selected=self.op.output_fields)
5594
5595   def ExpandNames(self):
5596     self.share_locks = _ShareAll()
5597
5598     if self.op.nodes:
5599       self.needed_locks = {
5600         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5601         }
5602     else:
5603       self.needed_locks = {
5604         locking.LEVEL_NODE: locking.ALL_SET,
5605         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5606         }
5607
5608   def Exec(self, feedback_fn):
5609     """Computes the list of nodes and their attributes.
5610
5611     """
5612     nodenames = self.owned_locks(locking.LEVEL_NODE)
5613     volumes = self.rpc.call_node_volumes(nodenames)
5614
5615     ilist = self.cfg.GetAllInstancesInfo()
5616     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5617
5618     output = []
5619     for node in nodenames:
5620       nresult = volumes[node]
5621       if nresult.offline:
5622         continue
5623       msg = nresult.fail_msg
5624       if msg:
5625         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5626         continue
5627
5628       node_vols = sorted(nresult.payload,
5629                          key=operator.itemgetter("dev"))
5630
5631       for vol in node_vols:
5632         node_output = []
5633         for field in self.op.output_fields:
5634           if field == "node":
5635             val = node
5636           elif field == "phys":
5637             val = vol["dev"]
5638           elif field == "vg":
5639             val = vol["vg"]
5640           elif field == "name":
5641             val = vol["name"]
5642           elif field == "size":
5643             val = int(float(vol["size"]))
5644           elif field == "instance":
5645             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5646           else:
5647             raise errors.ParameterError(field)
5648           node_output.append(str(val))
5649
5650         output.append(node_output)
5651
5652     return output
5653
5654
5655 class LUNodeQueryStorage(NoHooksLU):
5656   """Logical unit for getting information on storage units on node(s).
5657
5658   """
5659   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5660   REQ_BGL = False
5661
5662   def CheckArguments(self):
5663     _CheckOutputFields(static=self._FIELDS_STATIC,
5664                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5665                        selected=self.op.output_fields)
5666
5667   def ExpandNames(self):
5668     self.share_locks = _ShareAll()
5669
5670     if self.op.nodes:
5671       self.needed_locks = {
5672         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5673         }
5674     else:
5675       self.needed_locks = {
5676         locking.LEVEL_NODE: locking.ALL_SET,
5677         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5678         }
5679
5680   def Exec(self, feedback_fn):
5681     """Computes the list of nodes and their attributes.
5682
5683     """
5684     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5685
5686     # Always get name to sort by
5687     if constants.SF_NAME in self.op.output_fields:
5688       fields = self.op.output_fields[:]
5689     else:
5690       fields = [constants.SF_NAME] + self.op.output_fields
5691
5692     # Never ask for node or type as it's only known to the LU
5693     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5694       while extra in fields:
5695         fields.remove(extra)
5696
5697     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5698     name_idx = field_idx[constants.SF_NAME]
5699
5700     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5701     data = self.rpc.call_storage_list(self.nodes,
5702                                       self.op.storage_type, st_args,
5703                                       self.op.name, fields)
5704
5705     result = []
5706
5707     for node in utils.NiceSort(self.nodes):
5708       nresult = data[node]
5709       if nresult.offline:
5710         continue
5711
5712       msg = nresult.fail_msg
5713       if msg:
5714         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5715         continue
5716
5717       rows = dict([(row[name_idx], row) for row in nresult.payload])
5718
5719       for name in utils.NiceSort(rows.keys()):
5720         row = rows[name]
5721
5722         out = []
5723
5724         for field in self.op.output_fields:
5725           if field == constants.SF_NODE:
5726             val = node
5727           elif field == constants.SF_TYPE:
5728             val = self.op.storage_type
5729           elif field in field_idx:
5730             val = row[field_idx[field]]
5731           else:
5732             raise errors.ParameterError(field)
5733
5734           out.append(val)
5735
5736         result.append(out)
5737
5738     return result
5739
5740
5741 class _InstanceQuery(_QueryBase):
5742   FIELDS = query.INSTANCE_FIELDS
5743
5744   def ExpandNames(self, lu):
5745     lu.needed_locks = {}
5746     lu.share_locks = _ShareAll()
5747
5748     if self.names:
5749       self.wanted = _GetWantedInstances(lu, self.names)
5750     else:
5751       self.wanted = locking.ALL_SET
5752
5753     self.do_locking = (self.use_locking and
5754                        query.IQ_LIVE in self.requested_data)
5755     if self.do_locking:
5756       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5757       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5758       lu.needed_locks[locking.LEVEL_NODE] = []
5759       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5760
5761     self.do_grouplocks = (self.do_locking and
5762                           query.IQ_NODES in self.requested_data)
5763
5764   def DeclareLocks(self, lu, level):
5765     if self.do_locking:
5766       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5767         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5768
5769         # Lock all groups used by instances optimistically; this requires going
5770         # via the node before it's locked, requiring verification later on
5771         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5772           set(group_uuid
5773               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5774               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5775       elif level == locking.LEVEL_NODE:
5776         lu._LockInstancesNodes() # pylint: disable=W0212
5777
5778   @staticmethod
5779   def _CheckGroupLocks(lu):
5780     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5781     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5782
5783     # Check if node groups for locked instances are still correct
5784     for instance_name in owned_instances:
5785       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5786
5787   def _GetQueryData(self, lu):
5788     """Computes the list of instances and their attributes.
5789
5790     """
5791     if self.do_grouplocks:
5792       self._CheckGroupLocks(lu)
5793
5794     cluster = lu.cfg.GetClusterInfo()
5795     all_info = lu.cfg.GetAllInstancesInfo()
5796
5797     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5798
5799     instance_list = [all_info[name] for name in instance_names]
5800     nodes = frozenset(itertools.chain(*(inst.all_nodes
5801                                         for inst in instance_list)))
5802     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5803     bad_nodes = []
5804     offline_nodes = []
5805     wrongnode_inst = set()
5806
5807     # Gather data as requested
5808     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5809       live_data = {}
5810       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5811       for name in nodes:
5812         result = node_data[name]
5813         if result.offline:
5814           # offline nodes will be in both lists
5815           assert result.fail_msg
5816           offline_nodes.append(name)
5817         if result.fail_msg:
5818           bad_nodes.append(name)
5819         elif result.payload:
5820           for inst in result.payload:
5821             if inst in all_info:
5822               if all_info[inst].primary_node == name:
5823                 live_data.update(result.payload)
5824               else:
5825                 wrongnode_inst.add(inst)
5826             else:
5827               # orphan instance; we don't list it here as we don't
5828               # handle this case yet in the output of instance listing
5829               logging.warning("Orphan instance '%s' found on node %s",
5830                               inst, name)
5831         # else no instance is alive
5832     else:
5833       live_data = {}
5834
5835     if query.IQ_DISKUSAGE in self.requested_data:
5836       gmi = ganeti.masterd.instance
5837       disk_usage = dict((inst.name,
5838                          gmi.ComputeDiskSize(inst.disk_template,
5839                                              [{constants.IDISK_SIZE: disk.size}
5840                                               for disk in inst.disks]))
5841                         for inst in instance_list)
5842     else:
5843       disk_usage = None
5844
5845     if query.IQ_CONSOLE in self.requested_data:
5846       consinfo = {}
5847       for inst in instance_list:
5848         if inst.name in live_data:
5849           # Instance is running
5850           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5851         else:
5852           consinfo[inst.name] = None
5853       assert set(consinfo.keys()) == set(instance_names)
5854     else:
5855       consinfo = None
5856
5857     if query.IQ_NODES in self.requested_data:
5858       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5859                                             instance_list)))
5860       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5861       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5862                     for uuid in set(map(operator.attrgetter("group"),
5863                                         nodes.values())))
5864     else:
5865       nodes = None
5866       groups = None
5867
5868     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5869                                    disk_usage, offline_nodes, bad_nodes,
5870                                    live_data, wrongnode_inst, consinfo,
5871                                    nodes, groups)
5872
5873
5874 class LUQuery(NoHooksLU):
5875   """Query for resources/items of a certain kind.
5876
5877   """
5878   # pylint: disable=W0142
5879   REQ_BGL = False
5880
5881   def CheckArguments(self):
5882     qcls = _GetQueryImplementation(self.op.what)
5883
5884     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5885
5886   def ExpandNames(self):
5887     self.impl.ExpandNames(self)
5888
5889   def DeclareLocks(self, level):
5890     self.impl.DeclareLocks(self, level)
5891
5892   def Exec(self, feedback_fn):
5893     return self.impl.NewStyleQuery(self)
5894
5895
5896 class LUQueryFields(NoHooksLU):
5897   """Query for resources/items of a certain kind.
5898
5899   """
5900   # pylint: disable=W0142
5901   REQ_BGL = False
5902
5903   def CheckArguments(self):
5904     self.qcls = _GetQueryImplementation(self.op.what)
5905
5906   def ExpandNames(self):
5907     self.needed_locks = {}
5908
5909   def Exec(self, feedback_fn):
5910     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5911
5912
5913 class LUNodeModifyStorage(NoHooksLU):
5914   """Logical unit for modifying a storage volume on a node.
5915
5916   """
5917   REQ_BGL = False
5918
5919   def CheckArguments(self):
5920     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5921
5922     storage_type = self.op.storage_type
5923
5924     try:
5925       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5926     except KeyError:
5927       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5928                                  " modified" % storage_type,
5929                                  errors.ECODE_INVAL)
5930
5931     diff = set(self.op.changes.keys()) - modifiable
5932     if diff:
5933       raise errors.OpPrereqError("The following fields can not be modified for"
5934                                  " storage units of type '%s': %r" %
5935                                  (storage_type, list(diff)),
5936                                  errors.ECODE_INVAL)
5937
5938   def ExpandNames(self):
5939     self.needed_locks = {
5940       locking.LEVEL_NODE: self.op.node_name,
5941       }
5942
5943   def Exec(self, feedback_fn):
5944     """Computes the list of nodes and their attributes.
5945
5946     """
5947     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5948     result = self.rpc.call_storage_modify(self.op.node_name,
5949                                           self.op.storage_type, st_args,
5950                                           self.op.name, self.op.changes)
5951     result.Raise("Failed to modify storage unit '%s' on %s" %
5952                  (self.op.name, self.op.node_name))
5953
5954
5955 class LUNodeAdd(LogicalUnit):
5956   """Logical unit for adding node to the cluster.
5957
5958   """
5959   HPATH = "node-add"
5960   HTYPE = constants.HTYPE_NODE
5961   _NFLAGS = ["master_capable", "vm_capable"]
5962
5963   def CheckArguments(self):
5964     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5965     # validate/normalize the node name
5966     self.hostname = netutils.GetHostname(name=self.op.node_name,
5967                                          family=self.primary_ip_family)
5968     self.op.node_name = self.hostname.name
5969
5970     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5971       raise errors.OpPrereqError("Cannot readd the master node",
5972                                  errors.ECODE_STATE)
5973
5974     if self.op.readd and self.op.group:
5975       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5976                                  " being readded", errors.ECODE_INVAL)
5977
5978   def BuildHooksEnv(self):
5979     """Build hooks env.
5980
5981     This will run on all nodes before, and on all nodes + the new node after.
5982
5983     """
5984     return {
5985       "OP_TARGET": self.op.node_name,
5986       "NODE_NAME": self.op.node_name,
5987       "NODE_PIP": self.op.primary_ip,
5988       "NODE_SIP": self.op.secondary_ip,
5989       "MASTER_CAPABLE": str(self.op.master_capable),
5990       "VM_CAPABLE": str(self.op.vm_capable),
5991       }
5992
5993   def BuildHooksNodes(self):
5994     """Build hooks nodes.
5995
5996     """
5997     # Exclude added node
5998     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5999     post_nodes = pre_nodes + [self.op.node_name, ]
6000
6001     return (pre_nodes, post_nodes)
6002
6003   def CheckPrereq(self):
6004     """Check prerequisites.
6005
6006     This checks:
6007      - the new node is not already in the config
6008      - it is resolvable
6009      - its parameters (single/dual homed) matches the cluster
6010
6011     Any errors are signaled by raising errors.OpPrereqError.
6012
6013     """
6014     cfg = self.cfg
6015     hostname = self.hostname
6016     node = hostname.name
6017     primary_ip = self.op.primary_ip = hostname.ip
6018     if self.op.secondary_ip is None:
6019       if self.primary_ip_family == netutils.IP6Address.family:
6020         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6021                                    " IPv4 address must be given as secondary",
6022                                    errors.ECODE_INVAL)
6023       self.op.secondary_ip = primary_ip
6024
6025     secondary_ip = self.op.secondary_ip
6026     if not netutils.IP4Address.IsValid(secondary_ip):
6027       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6028                                  " address" % secondary_ip, errors.ECODE_INVAL)
6029
6030     node_list = cfg.GetNodeList()
6031     if not self.op.readd and node in node_list:
6032       raise errors.OpPrereqError("Node %s is already in the configuration" %
6033                                  node, errors.ECODE_EXISTS)
6034     elif self.op.readd and node not in node_list:
6035       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6036                                  errors.ECODE_NOENT)
6037
6038     self.changed_primary_ip = False
6039
6040     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6041       if self.op.readd and node == existing_node_name:
6042         if existing_node.secondary_ip != secondary_ip:
6043           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6044                                      " address configuration as before",
6045                                      errors.ECODE_INVAL)
6046         if existing_node.primary_ip != primary_ip:
6047           self.changed_primary_ip = True
6048
6049         continue
6050
6051       if (existing_node.primary_ip == primary_ip or
6052           existing_node.secondary_ip == primary_ip or
6053           existing_node.primary_ip == secondary_ip or
6054           existing_node.secondary_ip == secondary_ip):
6055         raise errors.OpPrereqError("New node ip address(es) conflict with"
6056                                    " existing node %s" % existing_node.name,
6057                                    errors.ECODE_NOTUNIQUE)
6058
6059     # After this 'if' block, None is no longer a valid value for the
6060     # _capable op attributes
6061     if self.op.readd:
6062       old_node = self.cfg.GetNodeInfo(node)
6063       assert old_node is not None, "Can't retrieve locked node %s" % node
6064       for attr in self._NFLAGS:
6065         if getattr(self.op, attr) is None:
6066           setattr(self.op, attr, getattr(old_node, attr))
6067     else:
6068       for attr in self._NFLAGS:
6069         if getattr(self.op, attr) is None:
6070           setattr(self.op, attr, True)
6071
6072     if self.op.readd and not self.op.vm_capable:
6073       pri, sec = cfg.GetNodeInstances(node)
6074       if pri or sec:
6075         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6076                                    " flag set to false, but it already holds"
6077                                    " instances" % node,
6078                                    errors.ECODE_STATE)
6079
6080     # check that the type of the node (single versus dual homed) is the
6081     # same as for the master
6082     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6083     master_singlehomed = myself.secondary_ip == myself.primary_ip
6084     newbie_singlehomed = secondary_ip == primary_ip
6085     if master_singlehomed != newbie_singlehomed:
6086       if master_singlehomed:
6087         raise errors.OpPrereqError("The master has no secondary ip but the"
6088                                    " new node has one",
6089                                    errors.ECODE_INVAL)
6090       else:
6091         raise errors.OpPrereqError("The master has a secondary ip but the"
6092                                    " new node doesn't have one",
6093                                    errors.ECODE_INVAL)
6094
6095     # checks reachability
6096     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6097       raise errors.OpPrereqError("Node not reachable by ping",
6098                                  errors.ECODE_ENVIRON)
6099
6100     if not newbie_singlehomed:
6101       # check reachability from my secondary ip to newbie's secondary ip
6102       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6103                               source=myself.secondary_ip):
6104         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6105                                    " based ping to node daemon port",
6106                                    errors.ECODE_ENVIRON)
6107
6108     if self.op.readd:
6109       exceptions = [node]
6110     else:
6111       exceptions = []
6112
6113     if self.op.master_capable:
6114       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6115     else:
6116       self.master_candidate = False
6117
6118     if self.op.readd:
6119       self.new_node = old_node
6120     else:
6121       node_group = cfg.LookupNodeGroup(self.op.group)
6122       self.new_node = objects.Node(name=node,
6123                                    primary_ip=primary_ip,
6124                                    secondary_ip=secondary_ip,
6125                                    master_candidate=self.master_candidate,
6126                                    offline=False, drained=False,
6127                                    group=node_group, ndparams={})
6128
6129     if self.op.ndparams:
6130       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6131       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6132                             "node", "cluster or group")
6133
6134     if self.op.hv_state:
6135       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6136
6137     if self.op.disk_state:
6138       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6139
6140     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6141     #       it a property on the base class.
6142     rpcrunner = rpc.DnsOnlyRunner()
6143     result = rpcrunner.call_version([node])[node]
6144     result.Raise("Can't get version information from node %s" % node)
6145     if constants.PROTOCOL_VERSION == result.payload:
6146       logging.info("Communication to node %s fine, sw version %s match",
6147                    node, result.payload)
6148     else:
6149       raise errors.OpPrereqError("Version mismatch master version %s,"
6150                                  " node version %s" %
6151                                  (constants.PROTOCOL_VERSION, result.payload),
6152                                  errors.ECODE_ENVIRON)
6153
6154     vg_name = cfg.GetVGName()
6155     if vg_name is not None:
6156       vparams = {constants.NV_PVLIST: [vg_name]}
6157       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6158       cname = self.cfg.GetClusterName()
6159       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6160       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6161       if errmsgs:
6162         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6163                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6164
6165   def Exec(self, feedback_fn):
6166     """Adds the new node to the cluster.
6167
6168     """
6169     new_node = self.new_node
6170     node = new_node.name
6171
6172     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6173       "Not owning BGL"
6174
6175     # We adding a new node so we assume it's powered
6176     new_node.powered = True
6177
6178     # for re-adds, reset the offline/drained/master-candidate flags;
6179     # we need to reset here, otherwise offline would prevent RPC calls
6180     # later in the procedure; this also means that if the re-add
6181     # fails, we are left with a non-offlined, broken node
6182     if self.op.readd:
6183       new_node.drained = new_node.offline = False # pylint: disable=W0201
6184       self.LogInfo("Readding a node, the offline/drained flags were reset")
6185       # if we demote the node, we do cleanup later in the procedure
6186       new_node.master_candidate = self.master_candidate
6187       if self.changed_primary_ip:
6188         new_node.primary_ip = self.op.primary_ip
6189
6190     # copy the master/vm_capable flags
6191     for attr in self._NFLAGS:
6192       setattr(new_node, attr, getattr(self.op, attr))
6193
6194     # notify the user about any possible mc promotion
6195     if new_node.master_candidate:
6196       self.LogInfo("Node will be a master candidate")
6197
6198     if self.op.ndparams:
6199       new_node.ndparams = self.op.ndparams
6200     else:
6201       new_node.ndparams = {}
6202
6203     if self.op.hv_state:
6204       new_node.hv_state_static = self.new_hv_state
6205
6206     if self.op.disk_state:
6207       new_node.disk_state_static = self.new_disk_state
6208
6209     # Add node to our /etc/hosts, and add key to known_hosts
6210     if self.cfg.GetClusterInfo().modify_etc_hosts:
6211       master_node = self.cfg.GetMasterNode()
6212       result = self.rpc.call_etc_hosts_modify(master_node,
6213                                               constants.ETC_HOSTS_ADD,
6214                                               self.hostname.name,
6215                                               self.hostname.ip)
6216       result.Raise("Can't update hosts file with new host data")
6217
6218     if new_node.secondary_ip != new_node.primary_ip:
6219       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6220                                False)
6221
6222     node_verify_list = [self.cfg.GetMasterNode()]
6223     node_verify_param = {
6224       constants.NV_NODELIST: ([node], {}),
6225       # TODO: do a node-net-test as well?
6226     }
6227
6228     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6229                                        self.cfg.GetClusterName())
6230     for verifier in node_verify_list:
6231       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6232       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6233       if nl_payload:
6234         for failed in nl_payload:
6235           feedback_fn("ssh/hostname verification failed"
6236                       " (checking from %s): %s" %
6237                       (verifier, nl_payload[failed]))
6238         raise errors.OpExecError("ssh/hostname verification failed")
6239
6240     if self.op.readd:
6241       _RedistributeAncillaryFiles(self)
6242       self.context.ReaddNode(new_node)
6243       # make sure we redistribute the config
6244       self.cfg.Update(new_node, feedback_fn)
6245       # and make sure the new node will not have old files around
6246       if not new_node.master_candidate:
6247         result = self.rpc.call_node_demote_from_mc(new_node.name)
6248         msg = result.fail_msg
6249         if msg:
6250           self.LogWarning("Node failed to demote itself from master"
6251                           " candidate status: %s" % msg)
6252     else:
6253       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6254                                   additional_vm=self.op.vm_capable)
6255       self.context.AddNode(new_node, self.proc.GetECId())
6256
6257
6258 class LUNodeSetParams(LogicalUnit):
6259   """Modifies the parameters of a node.
6260
6261   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6262       to the node role (as _ROLE_*)
6263   @cvar _R2F: a dictionary from node role to tuples of flags
6264   @cvar _FLAGS: a list of attribute names corresponding to the flags
6265
6266   """
6267   HPATH = "node-modify"
6268   HTYPE = constants.HTYPE_NODE
6269   REQ_BGL = False
6270   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6271   _F2R = {
6272     (True, False, False): _ROLE_CANDIDATE,
6273     (False, True, False): _ROLE_DRAINED,
6274     (False, False, True): _ROLE_OFFLINE,
6275     (False, False, False): _ROLE_REGULAR,
6276     }
6277   _R2F = dict((v, k) for k, v in _F2R.items())
6278   _FLAGS = ["master_candidate", "drained", "offline"]
6279
6280   def CheckArguments(self):
6281     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6282     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6283                 self.op.master_capable, self.op.vm_capable,
6284                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6285                 self.op.disk_state]
6286     if all_mods.count(None) == len(all_mods):
6287       raise errors.OpPrereqError("Please pass at least one modification",
6288                                  errors.ECODE_INVAL)
6289     if all_mods.count(True) > 1:
6290       raise errors.OpPrereqError("Can't set the node into more than one"
6291                                  " state at the same time",
6292                                  errors.ECODE_INVAL)
6293
6294     # Boolean value that tells us whether we might be demoting from MC
6295     self.might_demote = (self.op.master_candidate is False or
6296                          self.op.offline is True or
6297                          self.op.drained is True or
6298                          self.op.master_capable is False)
6299
6300     if self.op.secondary_ip:
6301       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6302         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6303                                    " address" % self.op.secondary_ip,
6304                                    errors.ECODE_INVAL)
6305
6306     self.lock_all = self.op.auto_promote and self.might_demote
6307     self.lock_instances = self.op.secondary_ip is not None
6308
6309   def _InstanceFilter(self, instance):
6310     """Filter for getting affected instances.
6311
6312     """
6313     return (instance.disk_template in constants.DTS_INT_MIRROR and
6314             self.op.node_name in instance.all_nodes)
6315
6316   def ExpandNames(self):
6317     if self.lock_all:
6318       self.needed_locks = {
6319         locking.LEVEL_NODE: locking.ALL_SET,
6320
6321         # Block allocations when all nodes are locked
6322         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6323         }
6324     else:
6325       self.needed_locks = {
6326         locking.LEVEL_NODE: self.op.node_name,
6327         }
6328
6329     # Since modifying a node can have severe effects on currently running
6330     # operations the resource lock is at least acquired in shared mode
6331     self.needed_locks[locking.LEVEL_NODE_RES] = \
6332       self.needed_locks[locking.LEVEL_NODE]
6333
6334     # Get all locks except nodes in shared mode; they are not used for anything
6335     # but read-only access
6336     self.share_locks = _ShareAll()
6337     self.share_locks[locking.LEVEL_NODE] = 0
6338     self.share_locks[locking.LEVEL_NODE_RES] = 0
6339     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6340
6341     if self.lock_instances:
6342       self.needed_locks[locking.LEVEL_INSTANCE] = \
6343         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6344
6345   def BuildHooksEnv(self):
6346     """Build hooks env.
6347
6348     This runs on the master node.
6349
6350     """
6351     return {
6352       "OP_TARGET": self.op.node_name,
6353       "MASTER_CANDIDATE": str(self.op.master_candidate),
6354       "OFFLINE": str(self.op.offline),
6355       "DRAINED": str(self.op.drained),
6356       "MASTER_CAPABLE": str(self.op.master_capable),
6357       "VM_CAPABLE": str(self.op.vm_capable),
6358       }
6359
6360   def BuildHooksNodes(self):
6361     """Build hooks nodes.
6362
6363     """
6364     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6365     return (nl, nl)
6366
6367   def CheckPrereq(self):
6368     """Check prerequisites.
6369
6370     This only checks the instance list against the existing names.
6371
6372     """
6373     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6374
6375     if self.lock_instances:
6376       affected_instances = \
6377         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6378
6379       # Verify instance locks
6380       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6381       wanted_instances = frozenset(affected_instances.keys())
6382       if wanted_instances - owned_instances:
6383         raise errors.OpPrereqError("Instances affected by changing node %s's"
6384                                    " secondary IP address have changed since"
6385                                    " locks were acquired, wanted '%s', have"
6386                                    " '%s'; retry the operation" %
6387                                    (self.op.node_name,
6388                                     utils.CommaJoin(wanted_instances),
6389                                     utils.CommaJoin(owned_instances)),
6390                                    errors.ECODE_STATE)
6391     else:
6392       affected_instances = None
6393
6394     if (self.op.master_candidate is not None or
6395         self.op.drained is not None or
6396         self.op.offline is not None):
6397       # we can't change the master's node flags
6398       if self.op.node_name == self.cfg.GetMasterNode():
6399         raise errors.OpPrereqError("The master role can be changed"
6400                                    " only via master-failover",
6401                                    errors.ECODE_INVAL)
6402
6403     if self.op.master_candidate and not node.master_capable:
6404       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6405                                  " it a master candidate" % node.name,
6406                                  errors.ECODE_STATE)
6407
6408     if self.op.vm_capable is False:
6409       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6410       if ipri or isec:
6411         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6412                                    " the vm_capable flag" % node.name,
6413                                    errors.ECODE_STATE)
6414
6415     if node.master_candidate and self.might_demote and not self.lock_all:
6416       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6417       # check if after removing the current node, we're missing master
6418       # candidates
6419       (mc_remaining, mc_should, _) = \
6420           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6421       if mc_remaining < mc_should:
6422         raise errors.OpPrereqError("Not enough master candidates, please"
6423                                    " pass auto promote option to allow"
6424                                    " promotion (--auto-promote or RAPI"
6425                                    " auto_promote=True)", errors.ECODE_STATE)
6426
6427     self.old_flags = old_flags = (node.master_candidate,
6428                                   node.drained, node.offline)
6429     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6430     self.old_role = old_role = self._F2R[old_flags]
6431
6432     # Check for ineffective changes
6433     for attr in self._FLAGS:
6434       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6435         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6436         setattr(self.op, attr, None)
6437
6438     # Past this point, any flag change to False means a transition
6439     # away from the respective state, as only real changes are kept
6440
6441     # TODO: We might query the real power state if it supports OOB
6442     if _SupportsOob(self.cfg, node):
6443       if self.op.offline is False and not (node.powered or
6444                                            self.op.powered is True):
6445         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6446                                     " offline status can be reset") %
6447                                    self.op.node_name, errors.ECODE_STATE)
6448     elif self.op.powered is not None:
6449       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6450                                   " as it does not support out-of-band"
6451                                   " handling") % self.op.node_name,
6452                                  errors.ECODE_STATE)
6453
6454     # If we're being deofflined/drained, we'll MC ourself if needed
6455     if (self.op.drained is False or self.op.offline is False or
6456         (self.op.master_capable and not node.master_capable)):
6457       if _DecideSelfPromotion(self):
6458         self.op.master_candidate = True
6459         self.LogInfo("Auto-promoting node to master candidate")
6460
6461     # If we're no longer master capable, we'll demote ourselves from MC
6462     if self.op.master_capable is False and node.master_candidate:
6463       self.LogInfo("Demoting from master candidate")
6464       self.op.master_candidate = False
6465
6466     # Compute new role
6467     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6468     if self.op.master_candidate:
6469       new_role = self._ROLE_CANDIDATE
6470     elif self.op.drained:
6471       new_role = self._ROLE_DRAINED
6472     elif self.op.offline:
6473       new_role = self._ROLE_OFFLINE
6474     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6475       # False is still in new flags, which means we're un-setting (the
6476       # only) True flag
6477       new_role = self._ROLE_REGULAR
6478     else: # no new flags, nothing, keep old role
6479       new_role = old_role
6480
6481     self.new_role = new_role
6482
6483     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6484       # Trying to transition out of offline status
6485       result = self.rpc.call_version([node.name])[node.name]
6486       if result.fail_msg:
6487         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6488                                    " to report its version: %s" %
6489                                    (node.name, result.fail_msg),
6490                                    errors.ECODE_STATE)
6491       else:
6492         self.LogWarning("Transitioning node from offline to online state"
6493                         " without using re-add. Please make sure the node"
6494                         " is healthy!")
6495
6496     # When changing the secondary ip, verify if this is a single-homed to
6497     # multi-homed transition or vice versa, and apply the relevant
6498     # restrictions.
6499     if self.op.secondary_ip:
6500       # Ok even without locking, because this can't be changed by any LU
6501       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6502       master_singlehomed = master.secondary_ip == master.primary_ip
6503       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6504         if self.op.force and node.name == master.name:
6505           self.LogWarning("Transitioning from single-homed to multi-homed"
6506                           " cluster; all nodes will require a secondary IP"
6507                           " address")
6508         else:
6509           raise errors.OpPrereqError("Changing the secondary ip on a"
6510                                      " single-homed cluster requires the"
6511                                      " --force option to be passed, and the"
6512                                      " target node to be the master",
6513                                      errors.ECODE_INVAL)
6514       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6515         if self.op.force and node.name == master.name:
6516           self.LogWarning("Transitioning from multi-homed to single-homed"
6517                           " cluster; secondary IP addresses will have to be"
6518                           " removed")
6519         else:
6520           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6521                                      " same as the primary IP on a multi-homed"
6522                                      " cluster, unless the --force option is"
6523                                      " passed, and the target node is the"
6524                                      " master", errors.ECODE_INVAL)
6525
6526       assert not (frozenset(affected_instances) -
6527                   self.owned_locks(locking.LEVEL_INSTANCE))
6528
6529       if node.offline:
6530         if affected_instances:
6531           msg = ("Cannot change secondary IP address: offline node has"
6532                  " instances (%s) configured to use it" %
6533                  utils.CommaJoin(affected_instances.keys()))
6534           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6535       else:
6536         # On online nodes, check that no instances are running, and that
6537         # the node has the new ip and we can reach it.
6538         for instance in affected_instances.values():
6539           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6540                               msg="cannot change secondary ip")
6541
6542         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6543         if master.name != node.name:
6544           # check reachability from master secondary ip to new secondary ip
6545           if not netutils.TcpPing(self.op.secondary_ip,
6546                                   constants.DEFAULT_NODED_PORT,
6547                                   source=master.secondary_ip):
6548             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6549                                        " based ping to node daemon port",
6550                                        errors.ECODE_ENVIRON)
6551
6552     if self.op.ndparams:
6553       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6554       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6555       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6556                             "node", "cluster or group")
6557       self.new_ndparams = new_ndparams
6558
6559     if self.op.hv_state:
6560       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6561                                                  self.node.hv_state_static)
6562
6563     if self.op.disk_state:
6564       self.new_disk_state = \
6565         _MergeAndVerifyDiskState(self.op.disk_state,
6566                                  self.node.disk_state_static)
6567
6568   def Exec(self, feedback_fn):
6569     """Modifies a node.
6570
6571     """
6572     node = self.node
6573     old_role = self.old_role
6574     new_role = self.new_role
6575
6576     result = []
6577
6578     if self.op.ndparams:
6579       node.ndparams = self.new_ndparams
6580
6581     if self.op.powered is not None:
6582       node.powered = self.op.powered
6583
6584     if self.op.hv_state:
6585       node.hv_state_static = self.new_hv_state
6586
6587     if self.op.disk_state:
6588       node.disk_state_static = self.new_disk_state
6589
6590     for attr in ["master_capable", "vm_capable"]:
6591       val = getattr(self.op, attr)
6592       if val is not None:
6593         setattr(node, attr, val)
6594         result.append((attr, str(val)))
6595
6596     if new_role != old_role:
6597       # Tell the node to demote itself, if no longer MC and not offline
6598       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6599         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6600         if msg:
6601           self.LogWarning("Node failed to demote itself: %s", msg)
6602
6603       new_flags = self._R2F[new_role]
6604       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6605         if of != nf:
6606           result.append((desc, str(nf)))
6607       (node.master_candidate, node.drained, node.offline) = new_flags
6608
6609       # we locked all nodes, we adjust the CP before updating this node
6610       if self.lock_all:
6611         _AdjustCandidatePool(self, [node.name])
6612
6613     if self.op.secondary_ip:
6614       node.secondary_ip = self.op.secondary_ip
6615       result.append(("secondary_ip", self.op.secondary_ip))
6616
6617     # this will trigger configuration file update, if needed
6618     self.cfg.Update(node, feedback_fn)
6619
6620     # this will trigger job queue propagation or cleanup if the mc
6621     # flag changed
6622     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6623       self.context.ReaddNode(node)
6624
6625     return result
6626
6627
6628 class LUNodePowercycle(NoHooksLU):
6629   """Powercycles a node.
6630
6631   """
6632   REQ_BGL = False
6633
6634   def CheckArguments(self):
6635     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6636     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6637       raise errors.OpPrereqError("The node is the master and the force"
6638                                  " parameter was not set",
6639                                  errors.ECODE_INVAL)
6640
6641   def ExpandNames(self):
6642     """Locking for PowercycleNode.
6643
6644     This is a last-resort option and shouldn't block on other
6645     jobs. Therefore, we grab no locks.
6646
6647     """
6648     self.needed_locks = {}
6649
6650   def Exec(self, feedback_fn):
6651     """Reboots a node.
6652
6653     """
6654     result = self.rpc.call_node_powercycle(self.op.node_name,
6655                                            self.cfg.GetHypervisorType())
6656     result.Raise("Failed to schedule the reboot")
6657     return result.payload
6658
6659
6660 class LUClusterQuery(NoHooksLU):
6661   """Query cluster configuration.
6662
6663   """
6664   REQ_BGL = False
6665
6666   def ExpandNames(self):
6667     self.needed_locks = {}
6668
6669   def Exec(self, feedback_fn):
6670     """Return cluster config.
6671
6672     """
6673     cluster = self.cfg.GetClusterInfo()
6674     os_hvp = {}
6675
6676     # Filter just for enabled hypervisors
6677     for os_name, hv_dict in cluster.os_hvp.items():
6678       os_hvp[os_name] = {}
6679       for hv_name, hv_params in hv_dict.items():
6680         if hv_name in cluster.enabled_hypervisors:
6681           os_hvp[os_name][hv_name] = hv_params
6682
6683     # Convert ip_family to ip_version
6684     primary_ip_version = constants.IP4_VERSION
6685     if cluster.primary_ip_family == netutils.IP6Address.family:
6686       primary_ip_version = constants.IP6_VERSION
6687
6688     result = {
6689       "software_version": constants.RELEASE_VERSION,
6690       "protocol_version": constants.PROTOCOL_VERSION,
6691       "config_version": constants.CONFIG_VERSION,
6692       "os_api_version": max(constants.OS_API_VERSIONS),
6693       "export_version": constants.EXPORT_VERSION,
6694       "architecture": runtime.GetArchInfo(),
6695       "name": cluster.cluster_name,
6696       "master": cluster.master_node,
6697       "default_hypervisor": cluster.primary_hypervisor,
6698       "enabled_hypervisors": cluster.enabled_hypervisors,
6699       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6700                         for hypervisor_name in cluster.enabled_hypervisors]),
6701       "os_hvp": os_hvp,
6702       "beparams": cluster.beparams,
6703       "osparams": cluster.osparams,
6704       "ipolicy": cluster.ipolicy,
6705       "nicparams": cluster.nicparams,
6706       "ndparams": cluster.ndparams,
6707       "diskparams": cluster.diskparams,
6708       "candidate_pool_size": cluster.candidate_pool_size,
6709       "master_netdev": cluster.master_netdev,
6710       "master_netmask": cluster.master_netmask,
6711       "use_external_mip_script": cluster.use_external_mip_script,
6712       "volume_group_name": cluster.volume_group_name,
6713       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6714       "file_storage_dir": cluster.file_storage_dir,
6715       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6716       "maintain_node_health": cluster.maintain_node_health,
6717       "ctime": cluster.ctime,
6718       "mtime": cluster.mtime,
6719       "uuid": cluster.uuid,
6720       "tags": list(cluster.GetTags()),
6721       "uid_pool": cluster.uid_pool,
6722       "default_iallocator": cluster.default_iallocator,
6723       "reserved_lvs": cluster.reserved_lvs,
6724       "primary_ip_version": primary_ip_version,
6725       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6726       "hidden_os": cluster.hidden_os,
6727       "blacklisted_os": cluster.blacklisted_os,
6728       }
6729
6730     return result
6731
6732
6733 class LUClusterConfigQuery(NoHooksLU):
6734   """Return configuration values.
6735
6736   """
6737   REQ_BGL = False
6738
6739   def CheckArguments(self):
6740     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6741
6742   def ExpandNames(self):
6743     self.cq.ExpandNames(self)
6744
6745   def DeclareLocks(self, level):
6746     self.cq.DeclareLocks(self, level)
6747
6748   def Exec(self, feedback_fn):
6749     result = self.cq.OldStyleQuery(self)
6750
6751     assert len(result) == 1
6752
6753     return result[0]
6754
6755
6756 class _ClusterQuery(_QueryBase):
6757   FIELDS = query.CLUSTER_FIELDS
6758
6759   #: Do not sort (there is only one item)
6760   SORT_FIELD = None
6761
6762   def ExpandNames(self, lu):
6763     lu.needed_locks = {}
6764
6765     # The following variables interact with _QueryBase._GetNames
6766     self.wanted = locking.ALL_SET
6767     self.do_locking = self.use_locking
6768
6769     if self.do_locking:
6770       raise errors.OpPrereqError("Can not use locking for cluster queries",
6771                                  errors.ECODE_INVAL)
6772
6773   def DeclareLocks(self, lu, level):
6774     pass
6775
6776   def _GetQueryData(self, lu):
6777     """Computes the list of nodes and their attributes.
6778
6779     """
6780     # Locking is not used
6781     assert not (compat.any(lu.glm.is_owned(level)
6782                            for level in locking.LEVELS
6783                            if level != locking.LEVEL_CLUSTER) or
6784                 self.do_locking or self.use_locking)
6785
6786     if query.CQ_CONFIG in self.requested_data:
6787       cluster = lu.cfg.GetClusterInfo()
6788     else:
6789       cluster = NotImplemented
6790
6791     if query.CQ_QUEUE_DRAINED in self.requested_data:
6792       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6793     else:
6794       drain_flag = NotImplemented
6795
6796     if query.CQ_WATCHER_PAUSE in self.requested_data:
6797       master_name = lu.cfg.GetMasterNode()
6798
6799       result = lu.rpc.call_get_watcher_pause(master_name)
6800       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6801                    master_name)
6802
6803       watcher_pause = result.payload
6804     else:
6805       watcher_pause = NotImplemented
6806
6807     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6808
6809
6810 class LUInstanceActivateDisks(NoHooksLU):
6811   """Bring up an instance's disks.
6812
6813   """
6814   REQ_BGL = False
6815
6816   def ExpandNames(self):
6817     self._ExpandAndLockInstance()
6818     self.needed_locks[locking.LEVEL_NODE] = []
6819     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6820
6821   def DeclareLocks(self, level):
6822     if level == locking.LEVEL_NODE:
6823       self._LockInstancesNodes()
6824
6825   def CheckPrereq(self):
6826     """Check prerequisites.
6827
6828     This checks that the instance is in the cluster.
6829
6830     """
6831     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832     assert self.instance is not None, \
6833       "Cannot retrieve locked instance %s" % self.op.instance_name
6834     _CheckNodeOnline(self, self.instance.primary_node)
6835
6836   def Exec(self, feedback_fn):
6837     """Activate the disks.
6838
6839     """
6840     disks_ok, disks_info = \
6841               _AssembleInstanceDisks(self, self.instance,
6842                                      ignore_size=self.op.ignore_size)
6843     if not disks_ok:
6844       raise errors.OpExecError("Cannot activate block devices")
6845
6846     if self.op.wait_for_sync:
6847       if not _WaitForSync(self, self.instance):
6848         raise errors.OpExecError("Some disks of the instance are degraded!")
6849
6850     return disks_info
6851
6852
6853 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6854                            ignore_size=False):
6855   """Prepare the block devices for an instance.
6856
6857   This sets up the block devices on all nodes.
6858
6859   @type lu: L{LogicalUnit}
6860   @param lu: the logical unit on whose behalf we execute
6861   @type instance: L{objects.Instance}
6862   @param instance: the instance for whose disks we assemble
6863   @type disks: list of L{objects.Disk} or None
6864   @param disks: which disks to assemble (or all, if None)
6865   @type ignore_secondaries: boolean
6866   @param ignore_secondaries: if true, errors on secondary nodes
6867       won't result in an error return from the function
6868   @type ignore_size: boolean
6869   @param ignore_size: if true, the current known size of the disk
6870       will not be used during the disk activation, useful for cases
6871       when the size is wrong
6872   @return: False if the operation failed, otherwise a list of
6873       (host, instance_visible_name, node_visible_name)
6874       with the mapping from node devices to instance devices
6875
6876   """
6877   device_info = []
6878   disks_ok = True
6879   iname = instance.name
6880   disks = _ExpandCheckDisks(instance, disks)
6881
6882   # With the two passes mechanism we try to reduce the window of
6883   # opportunity for the race condition of switching DRBD to primary
6884   # before handshaking occured, but we do not eliminate it
6885
6886   # The proper fix would be to wait (with some limits) until the
6887   # connection has been made and drbd transitions from WFConnection
6888   # into any other network-connected state (Connected, SyncTarget,
6889   # SyncSource, etc.)
6890
6891   # 1st pass, assemble on all nodes in secondary mode
6892   for idx, inst_disk in enumerate(disks):
6893     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6894       if ignore_size:
6895         node_disk = node_disk.Copy()
6896         node_disk.UnsetSize()
6897       lu.cfg.SetDiskID(node_disk, node)
6898       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6899                                              False, idx)
6900       msg = result.fail_msg
6901       if msg:
6902         is_offline_secondary = (node in instance.secondary_nodes and
6903                                 result.offline)
6904         lu.LogWarning("Could not prepare block device %s on node %s"
6905                       " (is_primary=False, pass=1): %s",
6906                       inst_disk.iv_name, node, msg)
6907         if not (ignore_secondaries or is_offline_secondary):
6908           disks_ok = False
6909
6910   # FIXME: race condition on drbd migration to primary
6911
6912   # 2nd pass, do only the primary node
6913   for idx, inst_disk in enumerate(disks):
6914     dev_path = None
6915
6916     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6917       if node != instance.primary_node:
6918         continue
6919       if ignore_size:
6920         node_disk = node_disk.Copy()
6921         node_disk.UnsetSize()
6922       lu.cfg.SetDiskID(node_disk, node)
6923       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6924                                              True, idx)
6925       msg = result.fail_msg
6926       if msg:
6927         lu.LogWarning("Could not prepare block device %s on node %s"
6928                       " (is_primary=True, pass=2): %s",
6929                       inst_disk.iv_name, node, msg)
6930         disks_ok = False
6931       else:
6932         dev_path = result.payload
6933
6934     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6935
6936   # leave the disks configured for the primary node
6937   # this is a workaround that would be fixed better by
6938   # improving the logical/physical id handling
6939   for disk in disks:
6940     lu.cfg.SetDiskID(disk, instance.primary_node)
6941
6942   return disks_ok, device_info
6943
6944
6945 def _StartInstanceDisks(lu, instance, force):
6946   """Start the disks of an instance.
6947
6948   """
6949   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6950                                            ignore_secondaries=force)
6951   if not disks_ok:
6952     _ShutdownInstanceDisks(lu, instance)
6953     if force is not None and not force:
6954       lu.LogWarning("",
6955                     hint=("If the message above refers to a secondary node,"
6956                           " you can retry the operation using '--force'"))
6957     raise errors.OpExecError("Disk consistency error")
6958
6959
6960 class LUInstanceDeactivateDisks(NoHooksLU):
6961   """Shutdown an instance's disks.
6962
6963   """
6964   REQ_BGL = False
6965
6966   def ExpandNames(self):
6967     self._ExpandAndLockInstance()
6968     self.needed_locks[locking.LEVEL_NODE] = []
6969     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6970
6971   def DeclareLocks(self, level):
6972     if level == locking.LEVEL_NODE:
6973       self._LockInstancesNodes()
6974
6975   def CheckPrereq(self):
6976     """Check prerequisites.
6977
6978     This checks that the instance is in the cluster.
6979
6980     """
6981     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6982     assert self.instance is not None, \
6983       "Cannot retrieve locked instance %s" % self.op.instance_name
6984
6985   def Exec(self, feedback_fn):
6986     """Deactivate the disks
6987
6988     """
6989     instance = self.instance
6990     if self.op.force:
6991       _ShutdownInstanceDisks(self, instance)
6992     else:
6993       _SafeShutdownInstanceDisks(self, instance)
6994
6995
6996 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6997   """Shutdown block devices of an instance.
6998
6999   This function checks if an instance is running, before calling
7000   _ShutdownInstanceDisks.
7001
7002   """
7003   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7004   _ShutdownInstanceDisks(lu, instance, disks=disks)
7005
7006
7007 def _ExpandCheckDisks(instance, disks):
7008   """Return the instance disks selected by the disks list
7009
7010   @type disks: list of L{objects.Disk} or None
7011   @param disks: selected disks
7012   @rtype: list of L{objects.Disk}
7013   @return: selected instance disks to act on
7014
7015   """
7016   if disks is None:
7017     return instance.disks
7018   else:
7019     if not set(disks).issubset(instance.disks):
7020       raise errors.ProgrammerError("Can only act on disks belonging to the"
7021                                    " target instance")
7022     return disks
7023
7024
7025 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7026   """Shutdown block devices of an instance.
7027
7028   This does the shutdown on all nodes of the instance.
7029
7030   If the ignore_primary is false, errors on the primary node are
7031   ignored.
7032
7033   """
7034   all_result = True
7035   disks = _ExpandCheckDisks(instance, disks)
7036
7037   for disk in disks:
7038     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7039       lu.cfg.SetDiskID(top_disk, node)
7040       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7041       msg = result.fail_msg
7042       if msg:
7043         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7044                       disk.iv_name, node, msg)
7045         if ((node == instance.primary_node and not ignore_primary) or
7046             (node != instance.primary_node and not result.offline)):
7047           all_result = False
7048   return all_result
7049
7050
7051 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7052   """Checks if a node has enough free memory.
7053
7054   This function checks if a given node has the needed amount of free
7055   memory. In case the node has less memory or we cannot get the
7056   information from the node, this function raises an OpPrereqError
7057   exception.
7058
7059   @type lu: C{LogicalUnit}
7060   @param lu: a logical unit from which we get configuration data
7061   @type node: C{str}
7062   @param node: the node to check
7063   @type reason: C{str}
7064   @param reason: string to use in the error message
7065   @type requested: C{int}
7066   @param requested: the amount of memory in MiB to check for
7067   @type hypervisor_name: C{str}
7068   @param hypervisor_name: the hypervisor to ask for memory stats
7069   @rtype: integer
7070   @return: node current free memory
7071   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7072       we cannot check the node
7073
7074   """
7075   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7076   nodeinfo[node].Raise("Can't get data from node %s" % node,
7077                        prereq=True, ecode=errors.ECODE_ENVIRON)
7078   (_, _, (hv_info, )) = nodeinfo[node].payload
7079
7080   free_mem = hv_info.get("memory_free", None)
7081   if not isinstance(free_mem, int):
7082     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7083                                " was '%s'" % (node, free_mem),
7084                                errors.ECODE_ENVIRON)
7085   if requested > free_mem:
7086     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7087                                " needed %s MiB, available %s MiB" %
7088                                (node, reason, requested, free_mem),
7089                                errors.ECODE_NORES)
7090   return free_mem
7091
7092
7093 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7094   """Checks if nodes have enough free disk space in all the VGs.
7095
7096   This function checks if all given nodes have the needed amount of
7097   free disk. In case any node has less disk or we cannot get the
7098   information from the node, this function raises an OpPrereqError
7099   exception.
7100
7101   @type lu: C{LogicalUnit}
7102   @param lu: a logical unit from which we get configuration data
7103   @type nodenames: C{list}
7104   @param nodenames: the list of node names to check
7105   @type req_sizes: C{dict}
7106   @param req_sizes: the hash of vg and corresponding amount of disk in
7107       MiB to check for
7108   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7109       or we cannot check the node
7110
7111   """
7112   for vg, req_size in req_sizes.items():
7113     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7114
7115
7116 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7117   """Checks if nodes have enough free disk space in the specified VG.
7118
7119   This function checks if all given nodes have the needed amount of
7120   free disk. In case any node has less disk or we cannot get the
7121   information from the node, this function raises an OpPrereqError
7122   exception.
7123
7124   @type lu: C{LogicalUnit}
7125   @param lu: a logical unit from which we get configuration data
7126   @type nodenames: C{list}
7127   @param nodenames: the list of node names to check
7128   @type vg: C{str}
7129   @param vg: the volume group to check
7130   @type requested: C{int}
7131   @param requested: the amount of disk in MiB to check for
7132   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7133       or we cannot check the node
7134
7135   """
7136   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7137   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7138   for node in nodenames:
7139     info = nodeinfo[node]
7140     info.Raise("Cannot get current information from node %s" % node,
7141                prereq=True, ecode=errors.ECODE_ENVIRON)
7142     (_, (vg_info, ), _) = info.payload
7143     vg_free = vg_info.get("vg_free", None)
7144     if not isinstance(vg_free, int):
7145       raise errors.OpPrereqError("Can't compute free disk space on node"
7146                                  " %s for vg %s, result was '%s'" %
7147                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7148     if requested > vg_free:
7149       raise errors.OpPrereqError("Not enough disk space on target node %s"
7150                                  " vg %s: required %d MiB, available %d MiB" %
7151                                  (node, vg, requested, vg_free),
7152                                  errors.ECODE_NORES)
7153
7154
7155 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7156   """Checks if nodes have enough physical CPUs
7157
7158   This function checks if all given nodes have the needed number of
7159   physical CPUs. In case any node has less CPUs or we cannot get the
7160   information from the node, this function raises an OpPrereqError
7161   exception.
7162
7163   @type lu: C{LogicalUnit}
7164   @param lu: a logical unit from which we get configuration data
7165   @type nodenames: C{list}
7166   @param nodenames: the list of node names to check
7167   @type requested: C{int}
7168   @param requested: the minimum acceptable number of physical CPUs
7169   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7170       or we cannot check the node
7171
7172   """
7173   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7174   for node in nodenames:
7175     info = nodeinfo[node]
7176     info.Raise("Cannot get current information from node %s" % node,
7177                prereq=True, ecode=errors.ECODE_ENVIRON)
7178     (_, _, (hv_info, )) = info.payload
7179     num_cpus = hv_info.get("cpu_total", None)
7180     if not isinstance(num_cpus, int):
7181       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7182                                  " on node %s, result was '%s'" %
7183                                  (node, num_cpus), errors.ECODE_ENVIRON)
7184     if requested > num_cpus:
7185       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7186                                  "required" % (node, num_cpus, requested),
7187                                  errors.ECODE_NORES)
7188
7189
7190 class LUInstanceStartup(LogicalUnit):
7191   """Starts an instance.
7192
7193   """
7194   HPATH = "instance-start"
7195   HTYPE = constants.HTYPE_INSTANCE
7196   REQ_BGL = False
7197
7198   def CheckArguments(self):
7199     # extra beparams
7200     if self.op.beparams:
7201       # fill the beparams dict
7202       objects.UpgradeBeParams(self.op.beparams)
7203       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7204
7205   def ExpandNames(self):
7206     self._ExpandAndLockInstance()
7207     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7208
7209   def DeclareLocks(self, level):
7210     if level == locking.LEVEL_NODE_RES:
7211       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7212
7213   def BuildHooksEnv(self):
7214     """Build hooks env.
7215
7216     This runs on master, primary and secondary nodes of the instance.
7217
7218     """
7219     env = {
7220       "FORCE": self.op.force,
7221       }
7222
7223     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7224
7225     return env
7226
7227   def BuildHooksNodes(self):
7228     """Build hooks nodes.
7229
7230     """
7231     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7232     return (nl, nl)
7233
7234   def CheckPrereq(self):
7235     """Check prerequisites.
7236
7237     This checks that the instance is in the cluster.
7238
7239     """
7240     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7241     assert self.instance is not None, \
7242       "Cannot retrieve locked instance %s" % self.op.instance_name
7243
7244     # extra hvparams
7245     if self.op.hvparams:
7246       # check hypervisor parameter syntax (locally)
7247       cluster = self.cfg.GetClusterInfo()
7248       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7249       filled_hvp = cluster.FillHV(instance)
7250       filled_hvp.update(self.op.hvparams)
7251       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7252       hv_type.CheckParameterSyntax(filled_hvp)
7253       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7254
7255     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7256
7257     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7258
7259     if self.primary_offline and self.op.ignore_offline_nodes:
7260       self.LogWarning("Ignoring offline primary node")
7261
7262       if self.op.hvparams or self.op.beparams:
7263         self.LogWarning("Overridden parameters are ignored")
7264     else:
7265       _CheckNodeOnline(self, instance.primary_node)
7266
7267       bep = self.cfg.GetClusterInfo().FillBE(instance)
7268       bep.update(self.op.beparams)
7269
7270       # check bridges existence
7271       _CheckInstanceBridgesExist(self, instance)
7272
7273       remote_info = self.rpc.call_instance_info(instance.primary_node,
7274                                                 instance.name,
7275                                                 instance.hypervisor)
7276       remote_info.Raise("Error checking node %s" % instance.primary_node,
7277                         prereq=True, ecode=errors.ECODE_ENVIRON)
7278       if not remote_info.payload: # not running already
7279         _CheckNodeFreeMemory(self, instance.primary_node,
7280                              "starting instance %s" % instance.name,
7281                              bep[constants.BE_MINMEM], instance.hypervisor)
7282
7283   def Exec(self, feedback_fn):
7284     """Start the instance.
7285
7286     """
7287     instance = self.instance
7288     force = self.op.force
7289
7290     if not self.op.no_remember:
7291       self.cfg.MarkInstanceUp(instance.name)
7292
7293     if self.primary_offline:
7294       assert self.op.ignore_offline_nodes
7295       self.LogInfo("Primary node offline, marked instance as started")
7296     else:
7297       node_current = instance.primary_node
7298
7299       _StartInstanceDisks(self, instance, force)
7300
7301       result = \
7302         self.rpc.call_instance_start(node_current,
7303                                      (instance, self.op.hvparams,
7304                                       self.op.beparams),
7305                                      self.op.startup_paused)
7306       msg = result.fail_msg
7307       if msg:
7308         _ShutdownInstanceDisks(self, instance)
7309         raise errors.OpExecError("Could not start instance: %s" % msg)
7310
7311
7312 class LUInstanceReboot(LogicalUnit):
7313   """Reboot an instance.
7314
7315   """
7316   HPATH = "instance-reboot"
7317   HTYPE = constants.HTYPE_INSTANCE
7318   REQ_BGL = False
7319
7320   def ExpandNames(self):
7321     self._ExpandAndLockInstance()
7322
7323   def BuildHooksEnv(self):
7324     """Build hooks env.
7325
7326     This runs on master, primary and secondary nodes of the instance.
7327
7328     """
7329     env = {
7330       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7331       "REBOOT_TYPE": self.op.reboot_type,
7332       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7333       }
7334
7335     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7336
7337     return env
7338
7339   def BuildHooksNodes(self):
7340     """Build hooks nodes.
7341
7342     """
7343     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7344     return (nl, nl)
7345
7346   def CheckPrereq(self):
7347     """Check prerequisites.
7348
7349     This checks that the instance is in the cluster.
7350
7351     """
7352     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7353     assert self.instance is not None, \
7354       "Cannot retrieve locked instance %s" % self.op.instance_name
7355     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7356     _CheckNodeOnline(self, instance.primary_node)
7357
7358     # check bridges existence
7359     _CheckInstanceBridgesExist(self, instance)
7360
7361   def Exec(self, feedback_fn):
7362     """Reboot the instance.
7363
7364     """
7365     instance = self.instance
7366     ignore_secondaries = self.op.ignore_secondaries
7367     reboot_type = self.op.reboot_type
7368
7369     remote_info = self.rpc.call_instance_info(instance.primary_node,
7370                                               instance.name,
7371                                               instance.hypervisor)
7372     remote_info.Raise("Error checking node %s" % instance.primary_node)
7373     instance_running = bool(remote_info.payload)
7374
7375     node_current = instance.primary_node
7376
7377     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7378                                             constants.INSTANCE_REBOOT_HARD]:
7379       for disk in instance.disks:
7380         self.cfg.SetDiskID(disk, node_current)
7381       result = self.rpc.call_instance_reboot(node_current, instance,
7382                                              reboot_type,
7383                                              self.op.shutdown_timeout)
7384       result.Raise("Could not reboot instance")
7385     else:
7386       if instance_running:
7387         result = self.rpc.call_instance_shutdown(node_current, instance,
7388                                                  self.op.shutdown_timeout)
7389         result.Raise("Could not shutdown instance for full reboot")
7390         _ShutdownInstanceDisks(self, instance)
7391       else:
7392         self.LogInfo("Instance %s was already stopped, starting now",
7393                      instance.name)
7394       _StartInstanceDisks(self, instance, ignore_secondaries)
7395       result = self.rpc.call_instance_start(node_current,
7396                                             (instance, None, None), False)
7397       msg = result.fail_msg
7398       if msg:
7399         _ShutdownInstanceDisks(self, instance)
7400         raise errors.OpExecError("Could not start instance for"
7401                                  " full reboot: %s" % msg)
7402
7403     self.cfg.MarkInstanceUp(instance.name)
7404
7405
7406 class LUInstanceShutdown(LogicalUnit):
7407   """Shutdown an instance.
7408
7409   """
7410   HPATH = "instance-stop"
7411   HTYPE = constants.HTYPE_INSTANCE
7412   REQ_BGL = False
7413
7414   def ExpandNames(self):
7415     self._ExpandAndLockInstance()
7416
7417   def BuildHooksEnv(self):
7418     """Build hooks env.
7419
7420     This runs on master, primary and secondary nodes of the instance.
7421
7422     """
7423     env = _BuildInstanceHookEnvByObject(self, self.instance)
7424     env["TIMEOUT"] = self.op.timeout
7425     return env
7426
7427   def BuildHooksNodes(self):
7428     """Build hooks nodes.
7429
7430     """
7431     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7432     return (nl, nl)
7433
7434   def CheckPrereq(self):
7435     """Check prerequisites.
7436
7437     This checks that the instance is in the cluster.
7438
7439     """
7440     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7441     assert self.instance is not None, \
7442       "Cannot retrieve locked instance %s" % self.op.instance_name
7443
7444     if not self.op.force:
7445       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7446     else:
7447       self.LogWarning("Ignoring offline instance check")
7448
7449     self.primary_offline = \
7450       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7451
7452     if self.primary_offline and self.op.ignore_offline_nodes:
7453       self.LogWarning("Ignoring offline primary node")
7454     else:
7455       _CheckNodeOnline(self, self.instance.primary_node)
7456
7457   def Exec(self, feedback_fn):
7458     """Shutdown the instance.
7459
7460     """
7461     instance = self.instance
7462     node_current = instance.primary_node
7463     timeout = self.op.timeout
7464
7465     # If the instance is offline we shouldn't mark it as down, as that
7466     # resets the offline flag.
7467     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7468       self.cfg.MarkInstanceDown(instance.name)
7469
7470     if self.primary_offline:
7471       assert self.op.ignore_offline_nodes
7472       self.LogInfo("Primary node offline, marked instance as stopped")
7473     else:
7474       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7475       msg = result.fail_msg
7476       if msg:
7477         self.LogWarning("Could not shutdown instance: %s", msg)
7478
7479       _ShutdownInstanceDisks(self, instance)
7480
7481
7482 class LUInstanceReinstall(LogicalUnit):
7483   """Reinstall an instance.
7484
7485   """
7486   HPATH = "instance-reinstall"
7487   HTYPE = constants.HTYPE_INSTANCE
7488   REQ_BGL = False
7489
7490   def ExpandNames(self):
7491     self._ExpandAndLockInstance()
7492
7493   def BuildHooksEnv(self):
7494     """Build hooks env.
7495
7496     This runs on master, primary and secondary nodes of the instance.
7497
7498     """
7499     return _BuildInstanceHookEnvByObject(self, self.instance)
7500
7501   def BuildHooksNodes(self):
7502     """Build hooks nodes.
7503
7504     """
7505     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7506     return (nl, nl)
7507
7508   def CheckPrereq(self):
7509     """Check prerequisites.
7510
7511     This checks that the instance is in the cluster and is not running.
7512
7513     """
7514     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7515     assert instance is not None, \
7516       "Cannot retrieve locked instance %s" % self.op.instance_name
7517     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7518                      " offline, cannot reinstall")
7519
7520     if instance.disk_template == constants.DT_DISKLESS:
7521       raise errors.OpPrereqError("Instance '%s' has no disks" %
7522                                  self.op.instance_name,
7523                                  errors.ECODE_INVAL)
7524     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7525
7526     if self.op.os_type is not None:
7527       # OS verification
7528       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7529       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7530       instance_os = self.op.os_type
7531     else:
7532       instance_os = instance.os
7533
7534     nodelist = list(instance.all_nodes)
7535
7536     if self.op.osparams:
7537       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7538       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7539       self.os_inst = i_osdict # the new dict (without defaults)
7540     else:
7541       self.os_inst = None
7542
7543     self.instance = instance
7544
7545   def Exec(self, feedback_fn):
7546     """Reinstall the instance.
7547
7548     """
7549     inst = self.instance
7550
7551     if self.op.os_type is not None:
7552       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7553       inst.os = self.op.os_type
7554       # Write to configuration
7555       self.cfg.Update(inst, feedback_fn)
7556
7557     _StartInstanceDisks(self, inst, None)
7558     try:
7559       feedback_fn("Running the instance OS create scripts...")
7560       # FIXME: pass debug option from opcode to backend
7561       result = self.rpc.call_instance_os_add(inst.primary_node,
7562                                              (inst, self.os_inst), True,
7563                                              self.op.debug_level)
7564       result.Raise("Could not install OS for instance %s on node %s" %
7565                    (inst.name, inst.primary_node))
7566     finally:
7567       _ShutdownInstanceDisks(self, inst)
7568
7569
7570 class LUInstanceRecreateDisks(LogicalUnit):
7571   """Recreate an instance's missing disks.
7572
7573   """
7574   HPATH = "instance-recreate-disks"
7575   HTYPE = constants.HTYPE_INSTANCE
7576   REQ_BGL = False
7577
7578   _MODIFYABLE = compat.UniqueFrozenset([
7579     constants.IDISK_SIZE,
7580     constants.IDISK_MODE,
7581     ])
7582
7583   # New or changed disk parameters may have different semantics
7584   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7585     constants.IDISK_ADOPT,
7586
7587     # TODO: Implement support changing VG while recreating
7588     constants.IDISK_VG,
7589     constants.IDISK_METAVG,
7590     constants.IDISK_PROVIDER,
7591     ]))
7592
7593   def _RunAllocator(self):
7594     """Run the allocator based on input opcode.
7595
7596     """
7597     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7598
7599     # FIXME
7600     # The allocator should actually run in "relocate" mode, but current
7601     # allocators don't support relocating all the nodes of an instance at
7602     # the same time. As a workaround we use "allocate" mode, but this is
7603     # suboptimal for two reasons:
7604     # - The instance name passed to the allocator is present in the list of
7605     #   existing instances, so there could be a conflict within the
7606     #   internal structures of the allocator. This doesn't happen with the
7607     #   current allocators, but it's a liability.
7608     # - The allocator counts the resources used by the instance twice: once
7609     #   because the instance exists already, and once because it tries to
7610     #   allocate a new instance.
7611     # The allocator could choose some of the nodes on which the instance is
7612     # running, but that's not a problem. If the instance nodes are broken,
7613     # they should be already be marked as drained or offline, and hence
7614     # skipped by the allocator. If instance disks have been lost for other
7615     # reasons, then recreating the disks on the same nodes should be fine.
7616     disk_template = self.instance.disk_template
7617     spindle_use = be_full[constants.BE_SPINDLE_USE]
7618     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7619                                         disk_template=disk_template,
7620                                         tags=list(self.instance.GetTags()),
7621                                         os=self.instance.os,
7622                                         nics=[{}],
7623                                         vcpus=be_full[constants.BE_VCPUS],
7624                                         memory=be_full[constants.BE_MAXMEM],
7625                                         spindle_use=spindle_use,
7626                                         disks=[{constants.IDISK_SIZE: d.size,
7627                                                 constants.IDISK_MODE: d.mode}
7628                                                 for d in self.instance.disks],
7629                                         hypervisor=self.instance.hypervisor,
7630                                         node_whitelist=None)
7631     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7632
7633     ial.Run(self.op.iallocator)
7634
7635     assert req.RequiredNodes() == len(self.instance.all_nodes)
7636
7637     if not ial.success:
7638       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7639                                  " %s" % (self.op.iallocator, ial.info),
7640                                  errors.ECODE_NORES)
7641
7642     self.op.nodes = ial.result
7643     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7644                  self.op.instance_name, self.op.iallocator,
7645                  utils.CommaJoin(ial.result))
7646
7647   def CheckArguments(self):
7648     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7649       # Normalize and convert deprecated list of disk indices
7650       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7651
7652     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7653     if duplicates:
7654       raise errors.OpPrereqError("Some disks have been specified more than"
7655                                  " once: %s" % utils.CommaJoin(duplicates),
7656                                  errors.ECODE_INVAL)
7657
7658     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7659     # when neither iallocator nor nodes are specified
7660     if self.op.iallocator or self.op.nodes:
7661       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7662
7663     for (idx, params) in self.op.disks:
7664       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7665       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7666       if unsupported:
7667         raise errors.OpPrereqError("Parameters for disk %s try to change"
7668                                    " unmodifyable parameter(s): %s" %
7669                                    (idx, utils.CommaJoin(unsupported)),
7670                                    errors.ECODE_INVAL)
7671
7672   def ExpandNames(self):
7673     self._ExpandAndLockInstance()
7674     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7675
7676     if self.op.nodes:
7677       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7678       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7679     else:
7680       self.needed_locks[locking.LEVEL_NODE] = []
7681       if self.op.iallocator:
7682         # iallocator will select a new node in the same group
7683         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7684         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7685
7686     self.needed_locks[locking.LEVEL_NODE_RES] = []
7687
7688   def DeclareLocks(self, level):
7689     if level == locking.LEVEL_NODEGROUP:
7690       assert self.op.iallocator is not None
7691       assert not self.op.nodes
7692       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7693       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7694       # Lock the primary group used by the instance optimistically; this
7695       # requires going via the node before it's locked, requiring
7696       # verification later on
7697       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7698         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7699
7700     elif level == locking.LEVEL_NODE:
7701       # If an allocator is used, then we lock all the nodes in the current
7702       # instance group, as we don't know yet which ones will be selected;
7703       # if we replace the nodes without using an allocator, locks are
7704       # already declared in ExpandNames; otherwise, we need to lock all the
7705       # instance nodes for disk re-creation
7706       if self.op.iallocator:
7707         assert not self.op.nodes
7708         assert not self.needed_locks[locking.LEVEL_NODE]
7709         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7710
7711         # Lock member nodes of the group of the primary node
7712         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7713           self.needed_locks[locking.LEVEL_NODE].extend(
7714             self.cfg.GetNodeGroup(group_uuid).members)
7715
7716         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7717       elif not self.op.nodes:
7718         self._LockInstancesNodes(primary_only=False)
7719     elif level == locking.LEVEL_NODE_RES:
7720       # Copy node locks
7721       self.needed_locks[locking.LEVEL_NODE_RES] = \
7722         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7723
7724   def BuildHooksEnv(self):
7725     """Build hooks env.
7726
7727     This runs on master, primary and secondary nodes of the instance.
7728
7729     """
7730     return _BuildInstanceHookEnvByObject(self, self.instance)
7731
7732   def BuildHooksNodes(self):
7733     """Build hooks nodes.
7734
7735     """
7736     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7737     return (nl, nl)
7738
7739   def CheckPrereq(self):
7740     """Check prerequisites.
7741
7742     This checks that the instance is in the cluster and is not running.
7743
7744     """
7745     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7746     assert instance is not None, \
7747       "Cannot retrieve locked instance %s" % self.op.instance_name
7748     if self.op.nodes:
7749       if len(self.op.nodes) != len(instance.all_nodes):
7750         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7751                                    " %d replacement nodes were specified" %
7752                                    (instance.name, len(instance.all_nodes),
7753                                     len(self.op.nodes)),
7754                                    errors.ECODE_INVAL)
7755       assert instance.disk_template != constants.DT_DRBD8 or \
7756           len(self.op.nodes) == 2
7757       assert instance.disk_template != constants.DT_PLAIN or \
7758           len(self.op.nodes) == 1
7759       primary_node = self.op.nodes[0]
7760     else:
7761       primary_node = instance.primary_node
7762     if not self.op.iallocator:
7763       _CheckNodeOnline(self, primary_node)
7764
7765     if instance.disk_template == constants.DT_DISKLESS:
7766       raise errors.OpPrereqError("Instance '%s' has no disks" %
7767                                  self.op.instance_name, errors.ECODE_INVAL)
7768
7769     # Verify if node group locks are still correct
7770     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7771     if owned_groups:
7772       # Node group locks are acquired only for the primary node (and only
7773       # when the allocator is used)
7774       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7775                                primary_only=True)
7776
7777     # if we replace nodes *and* the old primary is offline, we don't
7778     # check the instance state
7779     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7780     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7781       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7782                           msg="cannot recreate disks")
7783
7784     if self.op.disks:
7785       self.disks = dict(self.op.disks)
7786     else:
7787       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7788
7789     maxidx = max(self.disks.keys())
7790     if maxidx >= len(instance.disks):
7791       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7792                                  errors.ECODE_INVAL)
7793
7794     if ((self.op.nodes or self.op.iallocator) and
7795         sorted(self.disks.keys()) != range(len(instance.disks))):
7796       raise errors.OpPrereqError("Can't recreate disks partially and"
7797                                  " change the nodes at the same time",
7798                                  errors.ECODE_INVAL)
7799
7800     self.instance = instance
7801
7802     if self.op.iallocator:
7803       self._RunAllocator()
7804       # Release unneeded node and node resource locks
7805       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7806       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7807       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7808
7809     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7810
7811   def Exec(self, feedback_fn):
7812     """Recreate the disks.
7813
7814     """
7815     instance = self.instance
7816
7817     assert (self.owned_locks(locking.LEVEL_NODE) ==
7818             self.owned_locks(locking.LEVEL_NODE_RES))
7819
7820     to_skip = []
7821     mods = [] # keeps track of needed changes
7822
7823     for idx, disk in enumerate(instance.disks):
7824       try:
7825         changes = self.disks[idx]
7826       except KeyError:
7827         # Disk should not be recreated
7828         to_skip.append(idx)
7829         continue
7830
7831       # update secondaries for disks, if needed
7832       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7833         # need to update the nodes and minors
7834         assert len(self.op.nodes) == 2
7835         assert len(disk.logical_id) == 6 # otherwise disk internals
7836                                          # have changed
7837         (_, _, old_port, _, _, old_secret) = disk.logical_id
7838         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7839         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7840                   new_minors[0], new_minors[1], old_secret)
7841         assert len(disk.logical_id) == len(new_id)
7842       else:
7843         new_id = None
7844
7845       mods.append((idx, new_id, changes))
7846
7847     # now that we have passed all asserts above, we can apply the mods
7848     # in a single run (to avoid partial changes)
7849     for idx, new_id, changes in mods:
7850       disk = instance.disks[idx]
7851       if new_id is not None:
7852         assert disk.dev_type == constants.LD_DRBD8
7853         disk.logical_id = new_id
7854       if changes:
7855         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7856                     mode=changes.get(constants.IDISK_MODE, None))
7857
7858     # change primary node, if needed
7859     if self.op.nodes:
7860       instance.primary_node = self.op.nodes[0]
7861       self.LogWarning("Changing the instance's nodes, you will have to"
7862                       " remove any disks left on the older nodes manually")
7863
7864     if self.op.nodes:
7865       self.cfg.Update(instance, feedback_fn)
7866
7867     # All touched nodes must be locked
7868     mylocks = self.owned_locks(locking.LEVEL_NODE)
7869     assert mylocks.issuperset(frozenset(instance.all_nodes))
7870     _CreateDisks(self, instance, to_skip=to_skip)
7871
7872
7873 class LUInstanceRename(LogicalUnit):
7874   """Rename an instance.
7875
7876   """
7877   HPATH = "instance-rename"
7878   HTYPE = constants.HTYPE_INSTANCE
7879
7880   def CheckArguments(self):
7881     """Check arguments.
7882
7883     """
7884     if self.op.ip_check and not self.op.name_check:
7885       # TODO: make the ip check more flexible and not depend on the name check
7886       raise errors.OpPrereqError("IP address check requires a name check",
7887                                  errors.ECODE_INVAL)
7888
7889   def BuildHooksEnv(self):
7890     """Build hooks env.
7891
7892     This runs on master, primary and secondary nodes of the instance.
7893
7894     """
7895     env = _BuildInstanceHookEnvByObject(self, self.instance)
7896     env["INSTANCE_NEW_NAME"] = self.op.new_name
7897     return env
7898
7899   def BuildHooksNodes(self):
7900     """Build hooks nodes.
7901
7902     """
7903     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7904     return (nl, nl)
7905
7906   def CheckPrereq(self):
7907     """Check prerequisites.
7908
7909     This checks that the instance is in the cluster and is not running.
7910
7911     """
7912     self.op.instance_name = _ExpandInstanceName(self.cfg,
7913                                                 self.op.instance_name)
7914     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7915     assert instance is not None
7916     _CheckNodeOnline(self, instance.primary_node)
7917     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7918                         msg="cannot rename")
7919     self.instance = instance
7920
7921     new_name = self.op.new_name
7922     if self.op.name_check:
7923       hostname = _CheckHostnameSane(self, new_name)
7924       new_name = self.op.new_name = hostname.name
7925       if (self.op.ip_check and
7926           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7927         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7928                                    (hostname.ip, new_name),
7929                                    errors.ECODE_NOTUNIQUE)
7930
7931     instance_list = self.cfg.GetInstanceList()
7932     if new_name in instance_list and new_name != instance.name:
7933       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7934                                  new_name, errors.ECODE_EXISTS)
7935
7936   def Exec(self, feedback_fn):
7937     """Rename the instance.
7938
7939     """
7940     inst = self.instance
7941     old_name = inst.name
7942
7943     rename_file_storage = False
7944     if (inst.disk_template in constants.DTS_FILEBASED and
7945         self.op.new_name != inst.name):
7946       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7947       rename_file_storage = True
7948
7949     self.cfg.RenameInstance(inst.name, self.op.new_name)
7950     # Change the instance lock. This is definitely safe while we hold the BGL.
7951     # Otherwise the new lock would have to be added in acquired mode.
7952     assert self.REQ_BGL
7953     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7954     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7955     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7956
7957     # re-read the instance from the configuration after rename
7958     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7959
7960     if rename_file_storage:
7961       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7963                                                      old_file_storage_dir,
7964                                                      new_file_storage_dir)
7965       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7966                    " (but the instance has been renamed in Ganeti)" %
7967                    (inst.primary_node, old_file_storage_dir,
7968                     new_file_storage_dir))
7969
7970     _StartInstanceDisks(self, inst, None)
7971     # update info on disks
7972     info = _GetInstanceInfoText(inst)
7973     for (idx, disk) in enumerate(inst.disks):
7974       for node in inst.all_nodes:
7975         self.cfg.SetDiskID(disk, node)
7976         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7977         if result.fail_msg:
7978           self.LogWarning("Error setting info on node %s for disk %s: %s",
7979                           node, idx, result.fail_msg)
7980     try:
7981       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7982                                                  old_name, self.op.debug_level)
7983       msg = result.fail_msg
7984       if msg:
7985         msg = ("Could not run OS rename script for instance %s on node %s"
7986                " (but the instance has been renamed in Ganeti): %s" %
7987                (inst.name, inst.primary_node, msg))
7988         self.LogWarning(msg)
7989     finally:
7990       _ShutdownInstanceDisks(self, inst)
7991
7992     return inst.name
7993
7994
7995 class LUInstanceRemove(LogicalUnit):
7996   """Remove an instance.
7997
7998   """
7999   HPATH = "instance-remove"
8000   HTYPE = constants.HTYPE_INSTANCE
8001   REQ_BGL = False
8002
8003   def ExpandNames(self):
8004     self._ExpandAndLockInstance()
8005     self.needed_locks[locking.LEVEL_NODE] = []
8006     self.needed_locks[locking.LEVEL_NODE_RES] = []
8007     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8008
8009   def DeclareLocks(self, level):
8010     if level == locking.LEVEL_NODE:
8011       self._LockInstancesNodes()
8012     elif level == locking.LEVEL_NODE_RES:
8013       # Copy node locks
8014       self.needed_locks[locking.LEVEL_NODE_RES] = \
8015         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8016
8017   def BuildHooksEnv(self):
8018     """Build hooks env.
8019
8020     This runs on master, primary and secondary nodes of the instance.
8021
8022     """
8023     env = _BuildInstanceHookEnvByObject(self, self.instance)
8024     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8025     return env
8026
8027   def BuildHooksNodes(self):
8028     """Build hooks nodes.
8029
8030     """
8031     nl = [self.cfg.GetMasterNode()]
8032     nl_post = list(self.instance.all_nodes) + nl
8033     return (nl, nl_post)
8034
8035   def CheckPrereq(self):
8036     """Check prerequisites.
8037
8038     This checks that the instance is in the cluster.
8039
8040     """
8041     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8042     assert self.instance is not None, \
8043       "Cannot retrieve locked instance %s" % self.op.instance_name
8044
8045   def Exec(self, feedback_fn):
8046     """Remove the instance.
8047
8048     """
8049     instance = self.instance
8050     logging.info("Shutting down instance %s on node %s",
8051                  instance.name, instance.primary_node)
8052
8053     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8054                                              self.op.shutdown_timeout)
8055     msg = result.fail_msg
8056     if msg:
8057       if self.op.ignore_failures:
8058         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8059       else:
8060         raise errors.OpExecError("Could not shutdown instance %s on"
8061                                  " node %s: %s" %
8062                                  (instance.name, instance.primary_node, msg))
8063
8064     assert (self.owned_locks(locking.LEVEL_NODE) ==
8065             self.owned_locks(locking.LEVEL_NODE_RES))
8066     assert not (set(instance.all_nodes) -
8067                 self.owned_locks(locking.LEVEL_NODE)), \
8068       "Not owning correct locks"
8069
8070     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8071
8072
8073 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8074   """Utility function to remove an instance.
8075
8076   """
8077   logging.info("Removing block devices for instance %s", instance.name)
8078
8079   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8080     if not ignore_failures:
8081       raise errors.OpExecError("Can't remove instance's disks")
8082     feedback_fn("Warning: can't remove instance's disks")
8083
8084   logging.info("Removing instance %s out of cluster config", instance.name)
8085
8086   lu.cfg.RemoveInstance(instance.name)
8087
8088   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8089     "Instance lock removal conflict"
8090
8091   # Remove lock for the instance
8092   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8093
8094
8095 class LUInstanceQuery(NoHooksLU):
8096   """Logical unit for querying instances.
8097
8098   """
8099   # pylint: disable=W0142
8100   REQ_BGL = False
8101
8102   def CheckArguments(self):
8103     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8104                              self.op.output_fields, self.op.use_locking)
8105
8106   def ExpandNames(self):
8107     self.iq.ExpandNames(self)
8108
8109   def DeclareLocks(self, level):
8110     self.iq.DeclareLocks(self, level)
8111
8112   def Exec(self, feedback_fn):
8113     return self.iq.OldStyleQuery(self)
8114
8115
8116 def _ExpandNamesForMigration(lu):
8117   """Expands names for use with L{TLMigrateInstance}.
8118
8119   @type lu: L{LogicalUnit}
8120
8121   """
8122   if lu.op.target_node is not None:
8123     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8124
8125   lu.needed_locks[locking.LEVEL_NODE] = []
8126   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8127
8128   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8129   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8130
8131   # The node allocation lock is actually only needed for replicated instances
8132   # (e.g. DRBD8) and if an iallocator is used.
8133   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8134
8135
8136 def _DeclareLocksForMigration(lu, level):
8137   """Declares locks for L{TLMigrateInstance}.
8138
8139   @type lu: L{LogicalUnit}
8140   @param level: Lock level
8141
8142   """
8143   if level == locking.LEVEL_NODE_ALLOC:
8144     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8145
8146     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8147
8148     # Node locks are already declared here rather than at LEVEL_NODE as we need
8149     # the instance object anyway to declare the node allocation lock.
8150     if instance.disk_template in constants.DTS_EXT_MIRROR:
8151       if lu.op.target_node is None:
8152         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8153         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8154       else:
8155         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8156                                                lu.op.target_node]
8157       del lu.recalculate_locks[locking.LEVEL_NODE]
8158     else:
8159       lu._LockInstancesNodes() # pylint: disable=W0212
8160
8161   elif level == locking.LEVEL_NODE:
8162     # Node locks are declared together with the node allocation lock
8163     assert (lu.needed_locks[locking.LEVEL_NODE] or
8164             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8165
8166   elif level == locking.LEVEL_NODE_RES:
8167     # Copy node locks
8168     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8169       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8170
8171
8172 class LUInstanceFailover(LogicalUnit):
8173   """Failover an instance.
8174
8175   """
8176   HPATH = "instance-failover"
8177   HTYPE = constants.HTYPE_INSTANCE
8178   REQ_BGL = False
8179
8180   def CheckArguments(self):
8181     """Check the arguments.
8182
8183     """
8184     self.iallocator = getattr(self.op, "iallocator", None)
8185     self.target_node = getattr(self.op, "target_node", None)
8186
8187   def ExpandNames(self):
8188     self._ExpandAndLockInstance()
8189     _ExpandNamesForMigration(self)
8190
8191     self._migrater = \
8192       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8193                         self.op.ignore_consistency, True,
8194                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8195
8196     self.tasklets = [self._migrater]
8197
8198   def DeclareLocks(self, level):
8199     _DeclareLocksForMigration(self, level)
8200
8201   def BuildHooksEnv(self):
8202     """Build hooks env.
8203
8204     This runs on master, primary and secondary nodes of the instance.
8205
8206     """
8207     instance = self._migrater.instance
8208     source_node = instance.primary_node
8209     target_node = self.op.target_node
8210     env = {
8211       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8212       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8213       "OLD_PRIMARY": source_node,
8214       "NEW_PRIMARY": target_node,
8215       }
8216
8217     if instance.disk_template in constants.DTS_INT_MIRROR:
8218       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8219       env["NEW_SECONDARY"] = source_node
8220     else:
8221       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8222
8223     env.update(_BuildInstanceHookEnvByObject(self, instance))
8224
8225     return env
8226
8227   def BuildHooksNodes(self):
8228     """Build hooks nodes.
8229
8230     """
8231     instance = self._migrater.instance
8232     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8233     return (nl, nl + [instance.primary_node])
8234
8235
8236 class LUInstanceMigrate(LogicalUnit):
8237   """Migrate an instance.
8238
8239   This is migration without shutting down, compared to the failover,
8240   which is done with shutdown.
8241
8242   """
8243   HPATH = "instance-migrate"
8244   HTYPE = constants.HTYPE_INSTANCE
8245   REQ_BGL = False
8246
8247   def ExpandNames(self):
8248     self._ExpandAndLockInstance()
8249     _ExpandNamesForMigration(self)
8250
8251     self._migrater = \
8252       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8253                         False, self.op.allow_failover, False,
8254                         self.op.allow_runtime_changes,
8255                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8256                         self.op.ignore_ipolicy)
8257
8258     self.tasklets = [self._migrater]
8259
8260   def DeclareLocks(self, level):
8261     _DeclareLocksForMigration(self, level)
8262
8263   def BuildHooksEnv(self):
8264     """Build hooks env.
8265
8266     This runs on master, primary and secondary nodes of the instance.
8267
8268     """
8269     instance = self._migrater.instance
8270     source_node = instance.primary_node
8271     target_node = self.op.target_node
8272     env = _BuildInstanceHookEnvByObject(self, instance)
8273     env.update({
8274       "MIGRATE_LIVE": self._migrater.live,
8275       "MIGRATE_CLEANUP": self.op.cleanup,
8276       "OLD_PRIMARY": source_node,
8277       "NEW_PRIMARY": target_node,
8278       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8279       })
8280
8281     if instance.disk_template in constants.DTS_INT_MIRROR:
8282       env["OLD_SECONDARY"] = target_node
8283       env["NEW_SECONDARY"] = source_node
8284     else:
8285       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8286
8287     return env
8288
8289   def BuildHooksNodes(self):
8290     """Build hooks nodes.
8291
8292     """
8293     instance = self._migrater.instance
8294     snodes = list(instance.secondary_nodes)
8295     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8296     return (nl, nl)
8297
8298
8299 class LUInstanceMove(LogicalUnit):
8300   """Move an instance by data-copying.
8301
8302   """
8303   HPATH = "instance-move"
8304   HTYPE = constants.HTYPE_INSTANCE
8305   REQ_BGL = False
8306
8307   def ExpandNames(self):
8308     self._ExpandAndLockInstance()
8309     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8310     self.op.target_node = target_node
8311     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8312     self.needed_locks[locking.LEVEL_NODE_RES] = []
8313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8314
8315   def DeclareLocks(self, level):
8316     if level == locking.LEVEL_NODE:
8317       self._LockInstancesNodes(primary_only=True)
8318     elif level == locking.LEVEL_NODE_RES:
8319       # Copy node locks
8320       self.needed_locks[locking.LEVEL_NODE_RES] = \
8321         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8322
8323   def BuildHooksEnv(self):
8324     """Build hooks env.
8325
8326     This runs on master, primary and secondary nodes of the instance.
8327
8328     """
8329     env = {
8330       "TARGET_NODE": self.op.target_node,
8331       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8332       }
8333     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8334     return env
8335
8336   def BuildHooksNodes(self):
8337     """Build hooks nodes.
8338
8339     """
8340     nl = [
8341       self.cfg.GetMasterNode(),
8342       self.instance.primary_node,
8343       self.op.target_node,
8344       ]
8345     return (nl, nl)
8346
8347   def CheckPrereq(self):
8348     """Check prerequisites.
8349
8350     This checks that the instance is in the cluster.
8351
8352     """
8353     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8354     assert self.instance is not None, \
8355       "Cannot retrieve locked instance %s" % self.op.instance_name
8356
8357     node = self.cfg.GetNodeInfo(self.op.target_node)
8358     assert node is not None, \
8359       "Cannot retrieve locked node %s" % self.op.target_node
8360
8361     self.target_node = target_node = node.name
8362
8363     if target_node == instance.primary_node:
8364       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8365                                  (instance.name, target_node),
8366                                  errors.ECODE_STATE)
8367
8368     bep = self.cfg.GetClusterInfo().FillBE(instance)
8369
8370     for idx, dsk in enumerate(instance.disks):
8371       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8372         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8373                                    " cannot copy" % idx, errors.ECODE_STATE)
8374
8375     _CheckNodeOnline(self, target_node)
8376     _CheckNodeNotDrained(self, target_node)
8377     _CheckNodeVmCapable(self, target_node)
8378     cluster = self.cfg.GetClusterInfo()
8379     group_info = self.cfg.GetNodeGroup(node.group)
8380     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8381     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8382                             ignore=self.op.ignore_ipolicy)
8383
8384     if instance.admin_state == constants.ADMINST_UP:
8385       # check memory requirements on the secondary node
8386       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8387                            instance.name, bep[constants.BE_MAXMEM],
8388                            instance.hypervisor)
8389     else:
8390       self.LogInfo("Not checking memory on the secondary node as"
8391                    " instance will not be started")
8392
8393     # check bridge existance
8394     _CheckInstanceBridgesExist(self, instance, node=target_node)
8395
8396   def Exec(self, feedback_fn):
8397     """Move an instance.
8398
8399     The move is done by shutting it down on its present node, copying
8400     the data over (slow) and starting it on the new node.
8401
8402     """
8403     instance = self.instance
8404
8405     source_node = instance.primary_node
8406     target_node = self.target_node
8407
8408     self.LogInfo("Shutting down instance %s on source node %s",
8409                  instance.name, source_node)
8410
8411     assert (self.owned_locks(locking.LEVEL_NODE) ==
8412             self.owned_locks(locking.LEVEL_NODE_RES))
8413
8414     result = self.rpc.call_instance_shutdown(source_node, instance,
8415                                              self.op.shutdown_timeout)
8416     msg = result.fail_msg
8417     if msg:
8418       if self.op.ignore_consistency:
8419         self.LogWarning("Could not shutdown instance %s on node %s."
8420                         " Proceeding anyway. Please make sure node"
8421                         " %s is down. Error details: %s",
8422                         instance.name, source_node, source_node, msg)
8423       else:
8424         raise errors.OpExecError("Could not shutdown instance %s on"
8425                                  " node %s: %s" %
8426                                  (instance.name, source_node, msg))
8427
8428     # create the target disks
8429     try:
8430       _CreateDisks(self, instance, target_node=target_node)
8431     except errors.OpExecError:
8432       self.LogWarning("Device creation failed, reverting...")
8433       try:
8434         _RemoveDisks(self, instance, target_node=target_node)
8435       finally:
8436         self.cfg.ReleaseDRBDMinors(instance.name)
8437         raise
8438
8439     cluster_name = self.cfg.GetClusterInfo().cluster_name
8440
8441     errs = []
8442     # activate, get path, copy the data over
8443     for idx, disk in enumerate(instance.disks):
8444       self.LogInfo("Copying data for disk %d", idx)
8445       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8446                                                instance.name, True, idx)
8447       if result.fail_msg:
8448         self.LogWarning("Can't assemble newly created disk %d: %s",
8449                         idx, result.fail_msg)
8450         errs.append(result.fail_msg)
8451         break
8452       dev_path = result.payload
8453       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8454                                              target_node, dev_path,
8455                                              cluster_name)
8456       if result.fail_msg:
8457         self.LogWarning("Can't copy data over for disk %d: %s",
8458                         idx, result.fail_msg)
8459         errs.append(result.fail_msg)
8460         break
8461
8462     if errs:
8463       self.LogWarning("Some disks failed to copy, aborting")
8464       try:
8465         _RemoveDisks(self, instance, target_node=target_node)
8466       finally:
8467         self.cfg.ReleaseDRBDMinors(instance.name)
8468         raise errors.OpExecError("Errors during disk copy: %s" %
8469                                  (",".join(errs),))
8470
8471     instance.primary_node = target_node
8472     self.cfg.Update(instance, feedback_fn)
8473
8474     self.LogInfo("Removing the disks on the original node")
8475     _RemoveDisks(self, instance, target_node=source_node)
8476
8477     # Only start the instance if it's marked as up
8478     if instance.admin_state == constants.ADMINST_UP:
8479       self.LogInfo("Starting instance %s on node %s",
8480                    instance.name, target_node)
8481
8482       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8483                                            ignore_secondaries=True)
8484       if not disks_ok:
8485         _ShutdownInstanceDisks(self, instance)
8486         raise errors.OpExecError("Can't activate the instance's disks")
8487
8488       result = self.rpc.call_instance_start(target_node,
8489                                             (instance, None, None), False)
8490       msg = result.fail_msg
8491       if msg:
8492         _ShutdownInstanceDisks(self, instance)
8493         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8494                                  (instance.name, target_node, msg))
8495
8496
8497 class LUNodeMigrate(LogicalUnit):
8498   """Migrate all instances from a node.
8499
8500   """
8501   HPATH = "node-migrate"
8502   HTYPE = constants.HTYPE_NODE
8503   REQ_BGL = False
8504
8505   def CheckArguments(self):
8506     pass
8507
8508   def ExpandNames(self):
8509     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8510
8511     self.share_locks = _ShareAll()
8512     self.needed_locks = {
8513       locking.LEVEL_NODE: [self.op.node_name],
8514       }
8515
8516   def BuildHooksEnv(self):
8517     """Build hooks env.
8518
8519     This runs on the master, the primary and all the secondaries.
8520
8521     """
8522     return {
8523       "NODE_NAME": self.op.node_name,
8524       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8525       }
8526
8527   def BuildHooksNodes(self):
8528     """Build hooks nodes.
8529
8530     """
8531     nl = [self.cfg.GetMasterNode()]
8532     return (nl, nl)
8533
8534   def CheckPrereq(self):
8535     pass
8536
8537   def Exec(self, feedback_fn):
8538     # Prepare jobs for migration instances
8539     allow_runtime_changes = self.op.allow_runtime_changes
8540     jobs = [
8541       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8542                                  mode=self.op.mode,
8543                                  live=self.op.live,
8544                                  iallocator=self.op.iallocator,
8545                                  target_node=self.op.target_node,
8546                                  allow_runtime_changes=allow_runtime_changes,
8547                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8548       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8549
8550     # TODO: Run iallocator in this opcode and pass correct placement options to
8551     # OpInstanceMigrate. Since other jobs can modify the cluster between
8552     # running the iallocator and the actual migration, a good consistency model
8553     # will have to be found.
8554
8555     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8556             frozenset([self.op.node_name]))
8557
8558     return ResultWithJobs(jobs)
8559
8560
8561 class TLMigrateInstance(Tasklet):
8562   """Tasklet class for instance migration.
8563
8564   @type live: boolean
8565   @ivar live: whether the migration will be done live or non-live;
8566       this variable is initalized only after CheckPrereq has run
8567   @type cleanup: boolean
8568   @ivar cleanup: Wheater we cleanup from a failed migration
8569   @type iallocator: string
8570   @ivar iallocator: The iallocator used to determine target_node
8571   @type target_node: string
8572   @ivar target_node: If given, the target_node to reallocate the instance to
8573   @type failover: boolean
8574   @ivar failover: Whether operation results in failover or migration
8575   @type fallback: boolean
8576   @ivar fallback: Whether fallback to failover is allowed if migration not
8577                   possible
8578   @type ignore_consistency: boolean
8579   @ivar ignore_consistency: Wheter we should ignore consistency between source
8580                             and target node
8581   @type shutdown_timeout: int
8582   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8583   @type ignore_ipolicy: bool
8584   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8585
8586   """
8587
8588   # Constants
8589   _MIGRATION_POLL_INTERVAL = 1      # seconds
8590   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8591
8592   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8593                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8594                ignore_ipolicy):
8595     """Initializes this class.
8596
8597     """
8598     Tasklet.__init__(self, lu)
8599
8600     # Parameters
8601     self.instance_name = instance_name
8602     self.cleanup = cleanup
8603     self.live = False # will be overridden later
8604     self.failover = failover
8605     self.fallback = fallback
8606     self.ignore_consistency = ignore_consistency
8607     self.shutdown_timeout = shutdown_timeout
8608     self.ignore_ipolicy = ignore_ipolicy
8609     self.allow_runtime_changes = allow_runtime_changes
8610
8611   def CheckPrereq(self):
8612     """Check prerequisites.
8613
8614     This checks that the instance is in the cluster.
8615
8616     """
8617     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8618     instance = self.cfg.GetInstanceInfo(instance_name)
8619     assert instance is not None
8620     self.instance = instance
8621     cluster = self.cfg.GetClusterInfo()
8622
8623     if (not self.cleanup and
8624         not instance.admin_state == constants.ADMINST_UP and
8625         not self.failover and self.fallback):
8626       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8627                       " switching to failover")
8628       self.failover = True
8629
8630     if instance.disk_template not in constants.DTS_MIRRORED:
8631       if self.failover:
8632         text = "failovers"
8633       else:
8634         text = "migrations"
8635       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8636                                  " %s" % (instance.disk_template, text),
8637                                  errors.ECODE_STATE)
8638
8639     if instance.disk_template in constants.DTS_EXT_MIRROR:
8640       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8641
8642       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8643
8644       if self.lu.op.iallocator:
8645         self._RunAllocator()
8646       else:
8647         # We set set self.target_node as it is required by
8648         # BuildHooksEnv
8649         self.target_node = self.lu.op.target_node
8650
8651       # Check that the target node is correct in terms of instance policy
8652       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8653       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8654       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8655                                                               group_info)
8656       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8657                               ignore=self.ignore_ipolicy)
8658
8659       # self.target_node is already populated, either directly or by the
8660       # iallocator run
8661       target_node = self.target_node
8662       if self.target_node == instance.primary_node:
8663         raise errors.OpPrereqError("Cannot migrate instance %s"
8664                                    " to its primary (%s)" %
8665                                    (instance.name, instance.primary_node),
8666                                    errors.ECODE_STATE)
8667
8668       if len(self.lu.tasklets) == 1:
8669         # It is safe to release locks only when we're the only tasklet
8670         # in the LU
8671         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8672                       keep=[instance.primary_node, self.target_node])
8673         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8674
8675     else:
8676       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8677
8678       secondary_nodes = instance.secondary_nodes
8679       if not secondary_nodes:
8680         raise errors.ConfigurationError("No secondary node but using"
8681                                         " %s disk template" %
8682                                         instance.disk_template)
8683       target_node = secondary_nodes[0]
8684       if self.lu.op.iallocator or (self.lu.op.target_node and
8685                                    self.lu.op.target_node != target_node):
8686         if self.failover:
8687           text = "failed over"
8688         else:
8689           text = "migrated"
8690         raise errors.OpPrereqError("Instances with disk template %s cannot"
8691                                    " be %s to arbitrary nodes"
8692                                    " (neither an iallocator nor a target"
8693                                    " node can be passed)" %
8694                                    (instance.disk_template, text),
8695                                    errors.ECODE_INVAL)
8696       nodeinfo = self.cfg.GetNodeInfo(target_node)
8697       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8698       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8699                                                               group_info)
8700       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8701                               ignore=self.ignore_ipolicy)
8702
8703     i_be = cluster.FillBE(instance)
8704
8705     # check memory requirements on the secondary node
8706     if (not self.cleanup and
8707          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8708       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8709                                                "migrating instance %s" %
8710                                                instance.name,
8711                                                i_be[constants.BE_MINMEM],
8712                                                instance.hypervisor)
8713     else:
8714       self.lu.LogInfo("Not checking memory on the secondary node as"
8715                       " instance will not be started")
8716
8717     # check if failover must be forced instead of migration
8718     if (not self.cleanup and not self.failover and
8719         i_be[constants.BE_ALWAYS_FAILOVER]):
8720       self.lu.LogInfo("Instance configured to always failover; fallback"
8721                       " to failover")
8722       self.failover = True
8723
8724     # check bridge existance
8725     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8726
8727     if not self.cleanup:
8728       _CheckNodeNotDrained(self.lu, target_node)
8729       if not self.failover:
8730         result = self.rpc.call_instance_migratable(instance.primary_node,
8731                                                    instance)
8732         if result.fail_msg and self.fallback:
8733           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8734                           " failover")
8735           self.failover = True
8736         else:
8737           result.Raise("Can't migrate, please use failover",
8738                        prereq=True, ecode=errors.ECODE_STATE)
8739
8740     assert not (self.failover and self.cleanup)
8741
8742     if not self.failover:
8743       if self.lu.op.live is not None and self.lu.op.mode is not None:
8744         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8745                                    " parameters are accepted",
8746                                    errors.ECODE_INVAL)
8747       if self.lu.op.live is not None:
8748         if self.lu.op.live:
8749           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8750         else:
8751           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8752         # reset the 'live' parameter to None so that repeated
8753         # invocations of CheckPrereq do not raise an exception
8754         self.lu.op.live = None
8755       elif self.lu.op.mode is None:
8756         # read the default value from the hypervisor
8757         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8758         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8759
8760       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8761     else:
8762       # Failover is never live
8763       self.live = False
8764
8765     if not (self.failover or self.cleanup):
8766       remote_info = self.rpc.call_instance_info(instance.primary_node,
8767                                                 instance.name,
8768                                                 instance.hypervisor)
8769       remote_info.Raise("Error checking instance on node %s" %
8770                         instance.primary_node)
8771       instance_running = bool(remote_info.payload)
8772       if instance_running:
8773         self.current_mem = int(remote_info.payload["memory"])
8774
8775   def _RunAllocator(self):
8776     """Run the allocator based on input opcode.
8777
8778     """
8779     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8780
8781     # FIXME: add a self.ignore_ipolicy option
8782     req = iallocator.IAReqRelocate(name=self.instance_name,
8783                                    relocate_from=[self.instance.primary_node])
8784     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8785
8786     ial.Run(self.lu.op.iallocator)
8787
8788     if not ial.success:
8789       raise errors.OpPrereqError("Can't compute nodes using"
8790                                  " iallocator '%s': %s" %
8791                                  (self.lu.op.iallocator, ial.info),
8792                                  errors.ECODE_NORES)
8793     self.target_node = ial.result[0]
8794     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8795                     self.instance_name, self.lu.op.iallocator,
8796                     utils.CommaJoin(ial.result))
8797
8798   def _WaitUntilSync(self):
8799     """Poll with custom rpc for disk sync.
8800
8801     This uses our own step-based rpc call.
8802
8803     """
8804     self.feedback_fn("* wait until resync is done")
8805     all_done = False
8806     while not all_done:
8807       all_done = True
8808       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8809                                             self.nodes_ip,
8810                                             (self.instance.disks,
8811                                              self.instance))
8812       min_percent = 100
8813       for node, nres in result.items():
8814         nres.Raise("Cannot resync disks on node %s" % node)
8815         node_done, node_percent = nres.payload
8816         all_done = all_done and node_done
8817         if node_percent is not None:
8818           min_percent = min(min_percent, node_percent)
8819       if not all_done:
8820         if min_percent < 100:
8821           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8822         time.sleep(2)
8823
8824   def _EnsureSecondary(self, node):
8825     """Demote a node to secondary.
8826
8827     """
8828     self.feedback_fn("* switching node %s to secondary mode" % node)
8829
8830     for dev in self.instance.disks:
8831       self.cfg.SetDiskID(dev, node)
8832
8833     result = self.rpc.call_blockdev_close(node, self.instance.name,
8834                                           self.instance.disks)
8835     result.Raise("Cannot change disk to secondary on node %s" % node)
8836
8837   def _GoStandalone(self):
8838     """Disconnect from the network.
8839
8840     """
8841     self.feedback_fn("* changing into standalone mode")
8842     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8843                                                self.instance.disks)
8844     for node, nres in result.items():
8845       nres.Raise("Cannot disconnect disks node %s" % node)
8846
8847   def _GoReconnect(self, multimaster):
8848     """Reconnect to the network.
8849
8850     """
8851     if multimaster:
8852       msg = "dual-master"
8853     else:
8854       msg = "single-master"
8855     self.feedback_fn("* changing disks into %s mode" % msg)
8856     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8857                                            (self.instance.disks, self.instance),
8858                                            self.instance.name, multimaster)
8859     for node, nres in result.items():
8860       nres.Raise("Cannot change disks config on node %s" % node)
8861
8862   def _ExecCleanup(self):
8863     """Try to cleanup after a failed migration.
8864
8865     The cleanup is done by:
8866       - check that the instance is running only on one node
8867         (and update the config if needed)
8868       - change disks on its secondary node to secondary
8869       - wait until disks are fully synchronized
8870       - disconnect from the network
8871       - change disks into single-master mode
8872       - wait again until disks are fully synchronized
8873
8874     """
8875     instance = self.instance
8876     target_node = self.target_node
8877     source_node = self.source_node
8878
8879     # check running on only one node
8880     self.feedback_fn("* checking where the instance actually runs"
8881                      " (if this hangs, the hypervisor might be in"
8882                      " a bad state)")
8883     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8884     for node, result in ins_l.items():
8885       result.Raise("Can't contact node %s" % node)
8886
8887     runningon_source = instance.name in ins_l[source_node].payload
8888     runningon_target = instance.name in ins_l[target_node].payload
8889
8890     if runningon_source and runningon_target:
8891       raise errors.OpExecError("Instance seems to be running on two nodes,"
8892                                " or the hypervisor is confused; you will have"
8893                                " to ensure manually that it runs only on one"
8894                                " and restart this operation")
8895
8896     if not (runningon_source or runningon_target):
8897       raise errors.OpExecError("Instance does not seem to be running at all;"
8898                                " in this case it's safer to repair by"
8899                                " running 'gnt-instance stop' to ensure disk"
8900                                " shutdown, and then restarting it")
8901
8902     if runningon_target:
8903       # the migration has actually succeeded, we need to update the config
8904       self.feedback_fn("* instance running on secondary node (%s),"
8905                        " updating config" % target_node)
8906       instance.primary_node = target_node
8907       self.cfg.Update(instance, self.feedback_fn)
8908       demoted_node = source_node
8909     else:
8910       self.feedback_fn("* instance confirmed to be running on its"
8911                        " primary node (%s)" % source_node)
8912       demoted_node = target_node
8913
8914     if instance.disk_template in constants.DTS_INT_MIRROR:
8915       self._EnsureSecondary(demoted_node)
8916       try:
8917         self._WaitUntilSync()
8918       except errors.OpExecError:
8919         # we ignore here errors, since if the device is standalone, it
8920         # won't be able to sync
8921         pass
8922       self._GoStandalone()
8923       self._GoReconnect(False)
8924       self._WaitUntilSync()
8925
8926     self.feedback_fn("* done")
8927
8928   def _RevertDiskStatus(self):
8929     """Try to revert the disk status after a failed migration.
8930
8931     """
8932     target_node = self.target_node
8933     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8934       return
8935
8936     try:
8937       self._EnsureSecondary(target_node)
8938       self._GoStandalone()
8939       self._GoReconnect(False)
8940       self._WaitUntilSync()
8941     except errors.OpExecError, err:
8942       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8943                          " please try to recover the instance manually;"
8944                          " error '%s'" % str(err))
8945
8946   def _AbortMigration(self):
8947     """Call the hypervisor code to abort a started migration.
8948
8949     """
8950     instance = self.instance
8951     target_node = self.target_node
8952     source_node = self.source_node
8953     migration_info = self.migration_info
8954
8955     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8956                                                                  instance,
8957                                                                  migration_info,
8958                                                                  False)
8959     abort_msg = abort_result.fail_msg
8960     if abort_msg:
8961       logging.error("Aborting migration failed on target node %s: %s",
8962                     target_node, abort_msg)
8963       # Don't raise an exception here, as we stil have to try to revert the
8964       # disk status, even if this step failed.
8965
8966     abort_result = self.rpc.call_instance_finalize_migration_src(
8967       source_node, instance, False, self.live)
8968     abort_msg = abort_result.fail_msg
8969     if abort_msg:
8970       logging.error("Aborting migration failed on source node %s: %s",
8971                     source_node, abort_msg)
8972
8973   def _ExecMigration(self):
8974     """Migrate an instance.
8975
8976     The migrate is done by:
8977       - change the disks into dual-master mode
8978       - wait until disks are fully synchronized again
8979       - migrate the instance
8980       - change disks on the new secondary node (the old primary) to secondary
8981       - wait until disks are fully synchronized
8982       - change disks into single-master mode
8983
8984     """
8985     instance = self.instance
8986     target_node = self.target_node
8987     source_node = self.source_node
8988
8989     # Check for hypervisor version mismatch and warn the user.
8990     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8991                                        None, [self.instance.hypervisor], False)
8992     for ninfo in nodeinfo.values():
8993       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8994                   ninfo.node)
8995     (_, _, (src_info, )) = nodeinfo[source_node].payload
8996     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8997
8998     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8999         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9000       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9001       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9002       if src_version != dst_version:
9003         self.feedback_fn("* warning: hypervisor version mismatch between"
9004                          " source (%s) and target (%s) node" %
9005                          (src_version, dst_version))
9006
9007     self.feedback_fn("* checking disk consistency between source and target")
9008     for (idx, dev) in enumerate(instance.disks):
9009       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9010         raise errors.OpExecError("Disk %s is degraded or not fully"
9011                                  " synchronized on target node,"
9012                                  " aborting migration" % idx)
9013
9014     if self.current_mem > self.tgt_free_mem:
9015       if not self.allow_runtime_changes:
9016         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9017                                  " free memory to fit instance %s on target"
9018                                  " node %s (have %dMB, need %dMB)" %
9019                                  (instance.name, target_node,
9020                                   self.tgt_free_mem, self.current_mem))
9021       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9022       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9023                                                      instance,
9024                                                      self.tgt_free_mem)
9025       rpcres.Raise("Cannot modify instance runtime memory")
9026
9027     # First get the migration information from the remote node
9028     result = self.rpc.call_migration_info(source_node, instance)
9029     msg = result.fail_msg
9030     if msg:
9031       log_err = ("Failed fetching source migration information from %s: %s" %
9032                  (source_node, msg))
9033       logging.error(log_err)
9034       raise errors.OpExecError(log_err)
9035
9036     self.migration_info = migration_info = result.payload
9037
9038     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9039       # Then switch the disks to master/master mode
9040       self._EnsureSecondary(target_node)
9041       self._GoStandalone()
9042       self._GoReconnect(True)
9043       self._WaitUntilSync()
9044
9045     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9046     result = self.rpc.call_accept_instance(target_node,
9047                                            instance,
9048                                            migration_info,
9049                                            self.nodes_ip[target_node])
9050
9051     msg = result.fail_msg
9052     if msg:
9053       logging.error("Instance pre-migration failed, trying to revert"
9054                     " disk status: %s", msg)
9055       self.feedback_fn("Pre-migration failed, aborting")
9056       self._AbortMigration()
9057       self._RevertDiskStatus()
9058       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9059                                (instance.name, msg))
9060
9061     self.feedback_fn("* migrating instance to %s" % target_node)
9062     result = self.rpc.call_instance_migrate(source_node, instance,
9063                                             self.nodes_ip[target_node],
9064                                             self.live)
9065     msg = result.fail_msg
9066     if msg:
9067       logging.error("Instance migration failed, trying to revert"
9068                     " disk status: %s", msg)
9069       self.feedback_fn("Migration failed, aborting")
9070       self._AbortMigration()
9071       self._RevertDiskStatus()
9072       raise errors.OpExecError("Could not migrate instance %s: %s" %
9073                                (instance.name, msg))
9074
9075     self.feedback_fn("* starting memory transfer")
9076     last_feedback = time.time()
9077     while True:
9078       result = self.rpc.call_instance_get_migration_status(source_node,
9079                                                            instance)
9080       msg = result.fail_msg
9081       ms = result.payload   # MigrationStatus instance
9082       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9083         logging.error("Instance migration failed, trying to revert"
9084                       " disk status: %s", msg)
9085         self.feedback_fn("Migration failed, aborting")
9086         self._AbortMigration()
9087         self._RevertDiskStatus()
9088         if not msg:
9089           msg = "hypervisor returned failure"
9090         raise errors.OpExecError("Could not migrate instance %s: %s" %
9091                                  (instance.name, msg))
9092
9093       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9094         self.feedback_fn("* memory transfer complete")
9095         break
9096
9097       if (utils.TimeoutExpired(last_feedback,
9098                                self._MIGRATION_FEEDBACK_INTERVAL) and
9099           ms.transferred_ram is not None):
9100         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9101         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9102         last_feedback = time.time()
9103
9104       time.sleep(self._MIGRATION_POLL_INTERVAL)
9105
9106     result = self.rpc.call_instance_finalize_migration_src(source_node,
9107                                                            instance,
9108                                                            True,
9109                                                            self.live)
9110     msg = result.fail_msg
9111     if msg:
9112       logging.error("Instance migration succeeded, but finalization failed"
9113                     " on the source node: %s", msg)
9114       raise errors.OpExecError("Could not finalize instance migration: %s" %
9115                                msg)
9116
9117     instance.primary_node = target_node
9118
9119     # distribute new instance config to the other nodes
9120     self.cfg.Update(instance, self.feedback_fn)
9121
9122     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9123                                                            instance,
9124                                                            migration_info,
9125                                                            True)
9126     msg = result.fail_msg
9127     if msg:
9128       logging.error("Instance migration succeeded, but finalization failed"
9129                     " on the target node: %s", msg)
9130       raise errors.OpExecError("Could not finalize instance migration: %s" %
9131                                msg)
9132
9133     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9134       self._EnsureSecondary(source_node)
9135       self._WaitUntilSync()
9136       self._GoStandalone()
9137       self._GoReconnect(False)
9138       self._WaitUntilSync()
9139
9140     # If the instance's disk template is `rbd' or `ext' and there was a
9141     # successful migration, unmap the device from the source node.
9142     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9143       disks = _ExpandCheckDisks(instance, instance.disks)
9144       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9145       for disk in disks:
9146         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9147         msg = result.fail_msg
9148         if msg:
9149           logging.error("Migration was successful, but couldn't unmap the"
9150                         " block device %s on source node %s: %s",
9151                         disk.iv_name, source_node, msg)
9152           logging.error("You need to unmap the device %s manually on %s",
9153                         disk.iv_name, source_node)
9154
9155     self.feedback_fn("* done")
9156
9157   def _ExecFailover(self):
9158     """Failover an instance.
9159
9160     The failover is done by shutting it down on its present node and
9161     starting it on the secondary.
9162
9163     """
9164     instance = self.instance
9165     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9166
9167     source_node = instance.primary_node
9168     target_node = self.target_node
9169
9170     if instance.admin_state == constants.ADMINST_UP:
9171       self.feedback_fn("* checking disk consistency between source and target")
9172       for (idx, dev) in enumerate(instance.disks):
9173         # for drbd, these are drbd over lvm
9174         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9175                                      False):
9176           if primary_node.offline:
9177             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9178                              " target node %s" %
9179                              (primary_node.name, idx, target_node))
9180           elif not self.ignore_consistency:
9181             raise errors.OpExecError("Disk %s is degraded on target node,"
9182                                      " aborting failover" % idx)
9183     else:
9184       self.feedback_fn("* not checking disk consistency as instance is not"
9185                        " running")
9186
9187     self.feedback_fn("* shutting down instance on source node")
9188     logging.info("Shutting down instance %s on node %s",
9189                  instance.name, source_node)
9190
9191     result = self.rpc.call_instance_shutdown(source_node, instance,
9192                                              self.shutdown_timeout)
9193     msg = result.fail_msg
9194     if msg:
9195       if self.ignore_consistency or primary_node.offline:
9196         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9197                            " proceeding anyway; please make sure node"
9198                            " %s is down; error details: %s",
9199                            instance.name, source_node, source_node, msg)
9200       else:
9201         raise errors.OpExecError("Could not shutdown instance %s on"
9202                                  " node %s: %s" %
9203                                  (instance.name, source_node, msg))
9204
9205     self.feedback_fn("* deactivating the instance's disks on source node")
9206     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9207       raise errors.OpExecError("Can't shut down the instance's disks")
9208
9209     instance.primary_node = target_node
9210     # distribute new instance config to the other nodes
9211     self.cfg.Update(instance, self.feedback_fn)
9212
9213     # Only start the instance if it's marked as up
9214     if instance.admin_state == constants.ADMINST_UP:
9215       self.feedback_fn("* activating the instance's disks on target node %s" %
9216                        target_node)
9217       logging.info("Starting instance %s on node %s",
9218                    instance.name, target_node)
9219
9220       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9221                                            ignore_secondaries=True)
9222       if not disks_ok:
9223         _ShutdownInstanceDisks(self.lu, instance)
9224         raise errors.OpExecError("Can't activate the instance's disks")
9225
9226       self.feedback_fn("* starting the instance on the target node %s" %
9227                        target_node)
9228       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9229                                             False)
9230       msg = result.fail_msg
9231       if msg:
9232         _ShutdownInstanceDisks(self.lu, instance)
9233         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9234                                  (instance.name, target_node, msg))
9235
9236   def Exec(self, feedback_fn):
9237     """Perform the migration.
9238
9239     """
9240     self.feedback_fn = feedback_fn
9241     self.source_node = self.instance.primary_node
9242
9243     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9244     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9245       self.target_node = self.instance.secondary_nodes[0]
9246       # Otherwise self.target_node has been populated either
9247       # directly, or through an iallocator.
9248
9249     self.all_nodes = [self.source_node, self.target_node]
9250     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9251                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9252
9253     if self.failover:
9254       feedback_fn("Failover instance %s" % self.instance.name)
9255       self._ExecFailover()
9256     else:
9257       feedback_fn("Migrating instance %s" % self.instance.name)
9258
9259       if self.cleanup:
9260         return self._ExecCleanup()
9261       else:
9262         return self._ExecMigration()
9263
9264
9265 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9266                     force_open):
9267   """Wrapper around L{_CreateBlockDevInner}.
9268
9269   This method annotates the root device first.
9270
9271   """
9272   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9273   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9274   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9275                               force_open, excl_stor)
9276
9277
9278 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9279                          info, force_open, excl_stor):
9280   """Create a tree of block devices on a given node.
9281
9282   If this device type has to be created on secondaries, create it and
9283   all its children.
9284
9285   If not, just recurse to children keeping the same 'force' value.
9286
9287   @attention: The device has to be annotated already.
9288
9289   @param lu: the lu on whose behalf we execute
9290   @param node: the node on which to create the device
9291   @type instance: L{objects.Instance}
9292   @param instance: the instance which owns the device
9293   @type device: L{objects.Disk}
9294   @param device: the device to create
9295   @type force_create: boolean
9296   @param force_create: whether to force creation of this device; this
9297       will be change to True whenever we find a device which has
9298       CreateOnSecondary() attribute
9299   @param info: the extra 'metadata' we should attach to the device
9300       (this will be represented as a LVM tag)
9301   @type force_open: boolean
9302   @param force_open: this parameter will be passes to the
9303       L{backend.BlockdevCreate} function where it specifies
9304       whether we run on primary or not, and it affects both
9305       the child assembly and the device own Open() execution
9306   @type excl_stor: boolean
9307   @param excl_stor: Whether exclusive_storage is active for the node
9308
9309   """
9310   if device.CreateOnSecondary():
9311     force_create = True
9312
9313   if device.children:
9314     for child in device.children:
9315       _CreateBlockDevInner(lu, node, instance, child, force_create,
9316                            info, force_open, excl_stor)
9317
9318   if not force_create:
9319     return
9320
9321   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9322                         excl_stor)
9323
9324
9325 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9326                           excl_stor):
9327   """Create a single block device on a given node.
9328
9329   This will not recurse over children of the device, so they must be
9330   created in advance.
9331
9332   @param lu: the lu on whose behalf we execute
9333   @param node: the node on which to create the device
9334   @type instance: L{objects.Instance}
9335   @param instance: the instance which owns the device
9336   @type device: L{objects.Disk}
9337   @param device: the device to create
9338   @param info: the extra 'metadata' we should attach to the device
9339       (this will be represented as a LVM tag)
9340   @type force_open: boolean
9341   @param force_open: this parameter will be passes to the
9342       L{backend.BlockdevCreate} function where it specifies
9343       whether we run on primary or not, and it affects both
9344       the child assembly and the device own Open() execution
9345   @type excl_stor: boolean
9346   @param excl_stor: Whether exclusive_storage is active for the node
9347
9348   """
9349   lu.cfg.SetDiskID(device, node)
9350   result = lu.rpc.call_blockdev_create(node, device, device.size,
9351                                        instance.name, force_open, info,
9352                                        excl_stor)
9353   result.Raise("Can't create block device %s on"
9354                " node %s for instance %s" % (device, node, instance.name))
9355   if device.physical_id is None:
9356     device.physical_id = result.payload
9357
9358
9359 def _GenerateUniqueNames(lu, exts):
9360   """Generate a suitable LV name.
9361
9362   This will generate a logical volume name for the given instance.
9363
9364   """
9365   results = []
9366   for val in exts:
9367     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9368     results.append("%s%s" % (new_id, val))
9369   return results
9370
9371
9372 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9373                          iv_name, p_minor, s_minor):
9374   """Generate a drbd8 device complete with its children.
9375
9376   """
9377   assert len(vgnames) == len(names) == 2
9378   port = lu.cfg.AllocatePort()
9379   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9380
9381   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9382                           logical_id=(vgnames[0], names[0]),
9383                           params={})
9384   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9385                           size=constants.DRBD_META_SIZE,
9386                           logical_id=(vgnames[1], names[1]),
9387                           params={})
9388   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9389                           logical_id=(primary, secondary, port,
9390                                       p_minor, s_minor,
9391                                       shared_secret),
9392                           children=[dev_data, dev_meta],
9393                           iv_name=iv_name, params={})
9394   return drbd_dev
9395
9396
9397 _DISK_TEMPLATE_NAME_PREFIX = {
9398   constants.DT_PLAIN: "",
9399   constants.DT_RBD: ".rbd",
9400   constants.DT_EXT: ".ext",
9401   }
9402
9403
9404 _DISK_TEMPLATE_DEVICE_TYPE = {
9405   constants.DT_PLAIN: constants.LD_LV,
9406   constants.DT_FILE: constants.LD_FILE,
9407   constants.DT_SHARED_FILE: constants.LD_FILE,
9408   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9409   constants.DT_RBD: constants.LD_RBD,
9410   constants.DT_EXT: constants.LD_EXT,
9411   }
9412
9413
9414 def _GenerateDiskTemplate(
9415   lu, template_name, instance_name, primary_node, secondary_nodes,
9416   disk_info, file_storage_dir, file_driver, base_index,
9417   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9418   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9419   """Generate the entire disk layout for a given template type.
9420
9421   """
9422   vgname = lu.cfg.GetVGName()
9423   disk_count = len(disk_info)
9424   disks = []
9425
9426   if template_name == constants.DT_DISKLESS:
9427     pass
9428   elif template_name == constants.DT_DRBD8:
9429     if len(secondary_nodes) != 1:
9430       raise errors.ProgrammerError("Wrong template configuration")
9431     remote_node = secondary_nodes[0]
9432     minors = lu.cfg.AllocateDRBDMinor(
9433       [primary_node, remote_node] * len(disk_info), instance_name)
9434
9435     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9436                                                        full_disk_params)
9437     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9438
9439     names = []
9440     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9441                                                for i in range(disk_count)]):
9442       names.append(lv_prefix + "_data")
9443       names.append(lv_prefix + "_meta")
9444     for idx, disk in enumerate(disk_info):
9445       disk_index = idx + base_index
9446       data_vg = disk.get(constants.IDISK_VG, vgname)
9447       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9448       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9449                                       disk[constants.IDISK_SIZE],
9450                                       [data_vg, meta_vg],
9451                                       names[idx * 2:idx * 2 + 2],
9452                                       "disk/%d" % disk_index,
9453                                       minors[idx * 2], minors[idx * 2 + 1])
9454       disk_dev.mode = disk[constants.IDISK_MODE]
9455       disks.append(disk_dev)
9456   else:
9457     if secondary_nodes:
9458       raise errors.ProgrammerError("Wrong template configuration")
9459
9460     if template_name == constants.DT_FILE:
9461       _req_file_storage()
9462     elif template_name == constants.DT_SHARED_FILE:
9463       _req_shr_file_storage()
9464
9465     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9466     if name_prefix is None:
9467       names = None
9468     else:
9469       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9470                                         (name_prefix, base_index + i)
9471                                         for i in range(disk_count)])
9472
9473     if template_name == constants.DT_PLAIN:
9474
9475       def logical_id_fn(idx, _, disk):
9476         vg = disk.get(constants.IDISK_VG, vgname)
9477         return (vg, names[idx])
9478
9479     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9480       logical_id_fn = \
9481         lambda _, disk_index, disk: (file_driver,
9482                                      "%s/disk%d" % (file_storage_dir,
9483                                                     disk_index))
9484     elif template_name == constants.DT_BLOCK:
9485       logical_id_fn = \
9486         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9487                                        disk[constants.IDISK_ADOPT])
9488     elif template_name == constants.DT_RBD:
9489       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9490     elif template_name == constants.DT_EXT:
9491       def logical_id_fn(idx, _, disk):
9492         provider = disk.get(constants.IDISK_PROVIDER, None)
9493         if provider is None:
9494           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9495                                        " not found", constants.DT_EXT,
9496                                        constants.IDISK_PROVIDER)
9497         return (provider, names[idx])
9498     else:
9499       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9500
9501     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9502
9503     for idx, disk in enumerate(disk_info):
9504       params = {}
9505       # Only for the Ext template add disk_info to params
9506       if template_name == constants.DT_EXT:
9507         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9508         for key in disk:
9509           if key not in constants.IDISK_PARAMS:
9510             params[key] = disk[key]
9511       disk_index = idx + base_index
9512       size = disk[constants.IDISK_SIZE]
9513       feedback_fn("* disk %s, size %s" %
9514                   (disk_index, utils.FormatUnit(size, "h")))
9515       disks.append(objects.Disk(dev_type=dev_type, size=size,
9516                                 logical_id=logical_id_fn(idx, disk_index, disk),
9517                                 iv_name="disk/%d" % disk_index,
9518                                 mode=disk[constants.IDISK_MODE],
9519                                 params=params))
9520
9521   return disks
9522
9523
9524 def _GetInstanceInfoText(instance):
9525   """Compute that text that should be added to the disk's metadata.
9526
9527   """
9528   return "originstname+%s" % instance.name
9529
9530
9531 def _CalcEta(time_taken, written, total_size):
9532   """Calculates the ETA based on size written and total size.
9533
9534   @param time_taken: The time taken so far
9535   @param written: amount written so far
9536   @param total_size: The total size of data to be written
9537   @return: The remaining time in seconds
9538
9539   """
9540   avg_time = time_taken / float(written)
9541   return (total_size - written) * avg_time
9542
9543
9544 def _WipeDisks(lu, instance, disks=None):
9545   """Wipes instance disks.
9546
9547   @type lu: L{LogicalUnit}
9548   @param lu: the logical unit on whose behalf we execute
9549   @type instance: L{objects.Instance}
9550   @param instance: the instance whose disks we should create
9551   @return: the success of the wipe
9552
9553   """
9554   node = instance.primary_node
9555
9556   if disks is None:
9557     disks = [(idx, disk, 0)
9558              for (idx, disk) in enumerate(instance.disks)]
9559
9560   for (_, device, _) in disks:
9561     lu.cfg.SetDiskID(device, node)
9562
9563   logging.info("Pausing synchronization of disks of instance '%s'",
9564                instance.name)
9565   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9566                                                   (map(compat.snd, disks),
9567                                                    instance),
9568                                                   True)
9569   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9570
9571   for idx, success in enumerate(result.payload):
9572     if not success:
9573       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9574                    " failed", idx, instance.name)
9575
9576   try:
9577     for (idx, device, offset) in disks:
9578       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9579       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9580       wipe_chunk_size = \
9581         int(min(constants.MAX_WIPE_CHUNK,
9582                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9583
9584       size = device.size
9585       last_output = 0
9586       start_time = time.time()
9587
9588       if offset == 0:
9589         info_text = ""
9590       else:
9591         info_text = (" (from %s to %s)" %
9592                      (utils.FormatUnit(offset, "h"),
9593                       utils.FormatUnit(size, "h")))
9594
9595       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9596
9597       logging.info("Wiping disk %d for instance %s on node %s using"
9598                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9599
9600       while offset < size:
9601         wipe_size = min(wipe_chunk_size, size - offset)
9602
9603         logging.debug("Wiping disk %d, offset %s, chunk %s",
9604                       idx, offset, wipe_size)
9605
9606         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9607                                            wipe_size)
9608         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9609                      (idx, offset, wipe_size))
9610
9611         now = time.time()
9612         offset += wipe_size
9613         if now - last_output >= 60:
9614           eta = _CalcEta(now - start_time, offset, size)
9615           lu.LogInfo(" - done: %.1f%% ETA: %s",
9616                      offset / float(size) * 100, utils.FormatSeconds(eta))
9617           last_output = now
9618   finally:
9619     logging.info("Resuming synchronization of disks for instance '%s'",
9620                  instance.name)
9621
9622     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9623                                                     (map(compat.snd, disks),
9624                                                      instance),
9625                                                     False)
9626
9627     if result.fail_msg:
9628       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9629                     node, result.fail_msg)
9630     else:
9631       for idx, success in enumerate(result.payload):
9632         if not success:
9633           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9634                         " failed", idx, instance.name)
9635
9636
9637 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9638   """Create all disks for an instance.
9639
9640   This abstracts away some work from AddInstance.
9641
9642   @type lu: L{LogicalUnit}
9643   @param lu: the logical unit on whose behalf we execute
9644   @type instance: L{objects.Instance}
9645   @param instance: the instance whose disks we should create
9646   @type to_skip: list
9647   @param to_skip: list of indices to skip
9648   @type target_node: string
9649   @param target_node: if passed, overrides the target node for creation
9650   @rtype: boolean
9651   @return: the success of the creation
9652
9653   """
9654   info = _GetInstanceInfoText(instance)
9655   if target_node is None:
9656     pnode = instance.primary_node
9657     all_nodes = instance.all_nodes
9658   else:
9659     pnode = target_node
9660     all_nodes = [pnode]
9661
9662   if instance.disk_template in constants.DTS_FILEBASED:
9663     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9664     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9665
9666     result.Raise("Failed to create directory '%s' on"
9667                  " node %s" % (file_storage_dir, pnode))
9668
9669   # Note: this needs to be kept in sync with adding of disks in
9670   # LUInstanceSetParams
9671   for idx, device in enumerate(instance.disks):
9672     if to_skip and idx in to_skip:
9673       continue
9674     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9675     #HARDCODE
9676     for node in all_nodes:
9677       f_create = node == pnode
9678       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9679
9680
9681 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9682   """Remove all disks for an instance.
9683
9684   This abstracts away some work from `AddInstance()` and
9685   `RemoveInstance()`. Note that in case some of the devices couldn't
9686   be removed, the removal will continue with the other ones (compare
9687   with `_CreateDisks()`).
9688
9689   @type lu: L{LogicalUnit}
9690   @param lu: the logical unit on whose behalf we execute
9691   @type instance: L{objects.Instance}
9692   @param instance: the instance whose disks we should remove
9693   @type target_node: string
9694   @param target_node: used to override the node on which to remove the disks
9695   @rtype: boolean
9696   @return: the success of the removal
9697
9698   """
9699   logging.info("Removing block devices for instance %s", instance.name)
9700
9701   all_result = True
9702   ports_to_release = set()
9703   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9704   for (idx, device) in enumerate(anno_disks):
9705     if target_node:
9706       edata = [(target_node, device)]
9707     else:
9708       edata = device.ComputeNodeTree(instance.primary_node)
9709     for node, disk in edata:
9710       lu.cfg.SetDiskID(disk, node)
9711       result = lu.rpc.call_blockdev_remove(node, disk)
9712       if result.fail_msg:
9713         lu.LogWarning("Could not remove disk %s on node %s,"
9714                       " continuing anyway: %s", idx, node, result.fail_msg)
9715         if not (result.offline and node != instance.primary_node):
9716           all_result = False
9717
9718     # if this is a DRBD disk, return its port to the pool
9719     if device.dev_type in constants.LDS_DRBD:
9720       ports_to_release.add(device.logical_id[2])
9721
9722   if all_result or ignore_failures:
9723     for port in ports_to_release:
9724       lu.cfg.AddTcpUdpPort(port)
9725
9726   if instance.disk_template in constants.DTS_FILEBASED:
9727     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9728     if target_node:
9729       tgt = target_node
9730     else:
9731       tgt = instance.primary_node
9732     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9733     if result.fail_msg:
9734       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9735                     file_storage_dir, instance.primary_node, result.fail_msg)
9736       all_result = False
9737
9738   return all_result
9739
9740
9741 def _ComputeDiskSizePerVG(disk_template, disks):
9742   """Compute disk size requirements in the volume group
9743
9744   """
9745   def _compute(disks, payload):
9746     """Universal algorithm.
9747
9748     """
9749     vgs = {}
9750     for disk in disks:
9751       vgs[disk[constants.IDISK_VG]] = \
9752         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9753
9754     return vgs
9755
9756   # Required free disk space as a function of disk and swap space
9757   req_size_dict = {
9758     constants.DT_DISKLESS: {},
9759     constants.DT_PLAIN: _compute(disks, 0),
9760     # 128 MB are added for drbd metadata for each disk
9761     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9762     constants.DT_FILE: {},
9763     constants.DT_SHARED_FILE: {},
9764   }
9765
9766   if disk_template not in req_size_dict:
9767     raise errors.ProgrammerError("Disk template '%s' size requirement"
9768                                  " is unknown" % disk_template)
9769
9770   return req_size_dict[disk_template]
9771
9772
9773 def _FilterVmNodes(lu, nodenames):
9774   """Filters out non-vm_capable nodes from a list.
9775
9776   @type lu: L{LogicalUnit}
9777   @param lu: the logical unit for which we check
9778   @type nodenames: list
9779   @param nodenames: the list of nodes on which we should check
9780   @rtype: list
9781   @return: the list of vm-capable nodes
9782
9783   """
9784   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9785   return [name for name in nodenames if name not in vm_nodes]
9786
9787
9788 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9789   """Hypervisor parameter validation.
9790
9791   This function abstract the hypervisor parameter validation to be
9792   used in both instance create and instance modify.
9793
9794   @type lu: L{LogicalUnit}
9795   @param lu: the logical unit for which we check
9796   @type nodenames: list
9797   @param nodenames: the list of nodes on which we should check
9798   @type hvname: string
9799   @param hvname: the name of the hypervisor we should use
9800   @type hvparams: dict
9801   @param hvparams: the parameters which we need to check
9802   @raise errors.OpPrereqError: if the parameters are not valid
9803
9804   """
9805   nodenames = _FilterVmNodes(lu, nodenames)
9806
9807   cluster = lu.cfg.GetClusterInfo()
9808   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9809
9810   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9811   for node in nodenames:
9812     info = hvinfo[node]
9813     if info.offline:
9814       continue
9815     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9816
9817
9818 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9819   """OS parameters validation.
9820
9821   @type lu: L{LogicalUnit}
9822   @param lu: the logical unit for which we check
9823   @type required: boolean
9824   @param required: whether the validation should fail if the OS is not
9825       found
9826   @type nodenames: list
9827   @param nodenames: the list of nodes on which we should check
9828   @type osname: string
9829   @param osname: the name of the hypervisor we should use
9830   @type osparams: dict
9831   @param osparams: the parameters which we need to check
9832   @raise errors.OpPrereqError: if the parameters are not valid
9833
9834   """
9835   nodenames = _FilterVmNodes(lu, nodenames)
9836   result = lu.rpc.call_os_validate(nodenames, required, osname,
9837                                    [constants.OS_VALIDATE_PARAMETERS],
9838                                    osparams)
9839   for node, nres in result.items():
9840     # we don't check for offline cases since this should be run only
9841     # against the master node and/or an instance's nodes
9842     nres.Raise("OS Parameters validation failed on node %s" % node)
9843     if not nres.payload:
9844       lu.LogInfo("OS %s not found on node %s, validation skipped",
9845                  osname, node)
9846
9847
9848 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9849   """Wrapper around IAReqInstanceAlloc.
9850
9851   @param op: The instance opcode
9852   @param disks: The computed disks
9853   @param nics: The computed nics
9854   @param beparams: The full filled beparams
9855   @param node_whitelist: List of nodes which should appear as online to the
9856     allocator (unless the node is already marked offline)
9857
9858   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9859
9860   """
9861   spindle_use = beparams[constants.BE_SPINDLE_USE]
9862   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9863                                        disk_template=op.disk_template,
9864                                        tags=op.tags,
9865                                        os=op.os_type,
9866                                        vcpus=beparams[constants.BE_VCPUS],
9867                                        memory=beparams[constants.BE_MAXMEM],
9868                                        spindle_use=spindle_use,
9869                                        disks=disks,
9870                                        nics=[n.ToDict() for n in nics],
9871                                        hypervisor=op.hypervisor,
9872                                        node_whitelist=node_whitelist)
9873
9874
9875 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9876   """Computes the nics.
9877
9878   @param op: The instance opcode
9879   @param cluster: Cluster configuration object
9880   @param default_ip: The default ip to assign
9881   @param cfg: An instance of the configuration object
9882   @param ec_id: Execution context ID
9883
9884   @returns: The build up nics
9885
9886   """
9887   nics = []
9888   for nic in op.nics:
9889     nic_mode_req = nic.get(constants.INIC_MODE, None)
9890     nic_mode = nic_mode_req
9891     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9892       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9893
9894     net = nic.get(constants.INIC_NETWORK, None)
9895     link = nic.get(constants.NIC_LINK, None)
9896     ip = nic.get(constants.INIC_IP, None)
9897
9898     if net is None or net.lower() == constants.VALUE_NONE:
9899       net = None
9900     else:
9901       if nic_mode_req is not None or link is not None:
9902         raise errors.OpPrereqError("If network is given, no mode or link"
9903                                    " is allowed to be passed",
9904                                    errors.ECODE_INVAL)
9905
9906     # ip validity checks
9907     if ip is None or ip.lower() == constants.VALUE_NONE:
9908       nic_ip = None
9909     elif ip.lower() == constants.VALUE_AUTO:
9910       if not op.name_check:
9911         raise errors.OpPrereqError("IP address set to auto but name checks"
9912                                    " have been skipped",
9913                                    errors.ECODE_INVAL)
9914       nic_ip = default_ip
9915     else:
9916       # We defer pool operations until later, so that the iallocator has
9917       # filled in the instance's node(s) dimara
9918       if ip.lower() == constants.NIC_IP_POOL:
9919         if net is None:
9920           raise errors.OpPrereqError("if ip=pool, parameter network"
9921                                      " must be passed too",
9922                                      errors.ECODE_INVAL)
9923
9924       elif not netutils.IPAddress.IsValid(ip):
9925         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9926                                    errors.ECODE_INVAL)
9927
9928       nic_ip = ip
9929
9930     # TODO: check the ip address for uniqueness
9931     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9932       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9933                                  errors.ECODE_INVAL)
9934
9935     # MAC address verification
9936     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9937     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9938       mac = utils.NormalizeAndValidateMac(mac)
9939
9940       try:
9941         # TODO: We need to factor this out
9942         cfg.ReserveMAC(mac, ec_id)
9943       except errors.ReservationError:
9944         raise errors.OpPrereqError("MAC address %s already in use"
9945                                    " in cluster" % mac,
9946                                    errors.ECODE_NOTUNIQUE)
9947
9948     #  Build nic parameters
9949     nicparams = {}
9950     if nic_mode_req:
9951       nicparams[constants.NIC_MODE] = nic_mode
9952     if link:
9953       nicparams[constants.NIC_LINK] = link
9954
9955     check_params = cluster.SimpleFillNIC(nicparams)
9956     objects.NIC.CheckParameterSyntax(check_params)
9957     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9958                             network=net, nicparams=nicparams))
9959
9960   return nics
9961
9962
9963 def _ComputeDisks(op, default_vg):
9964   """Computes the instance disks.
9965
9966   @param op: The instance opcode
9967   @param default_vg: The default_vg to assume
9968
9969   @return: The computed disks
9970
9971   """
9972   disks = []
9973   for disk in op.disks:
9974     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9975     if mode not in constants.DISK_ACCESS_SET:
9976       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9977                                  mode, errors.ECODE_INVAL)
9978     size = disk.get(constants.IDISK_SIZE, None)
9979     if size is None:
9980       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9981     try:
9982       size = int(size)
9983     except (TypeError, ValueError):
9984       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9985                                  errors.ECODE_INVAL)
9986
9987     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9988     if ext_provider and op.disk_template != constants.DT_EXT:
9989       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9990                                  " disk template, not %s" %
9991                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
9992                                  op.disk_template), errors.ECODE_INVAL)
9993
9994     data_vg = disk.get(constants.IDISK_VG, default_vg)
9995     new_disk = {
9996       constants.IDISK_SIZE: size,
9997       constants.IDISK_MODE: mode,
9998       constants.IDISK_VG: data_vg,
9999       }
10000
10001     if constants.IDISK_METAVG in disk:
10002       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10003     if constants.IDISK_ADOPT in disk:
10004       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10005
10006     # For extstorage, demand the `provider' option and add any
10007     # additional parameters (ext-params) to the dict
10008     if op.disk_template == constants.DT_EXT:
10009       if ext_provider:
10010         new_disk[constants.IDISK_PROVIDER] = ext_provider
10011         for key in disk:
10012           if key not in constants.IDISK_PARAMS:
10013             new_disk[key] = disk[key]
10014       else:
10015         raise errors.OpPrereqError("Missing provider for template '%s'" %
10016                                    constants.DT_EXT, errors.ECODE_INVAL)
10017
10018     disks.append(new_disk)
10019
10020   return disks
10021
10022
10023 def _ComputeFullBeParams(op, cluster):
10024   """Computes the full beparams.
10025
10026   @param op: The instance opcode
10027   @param cluster: The cluster config object
10028
10029   @return: The fully filled beparams
10030
10031   """
10032   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10033   for param, value in op.beparams.iteritems():
10034     if value == constants.VALUE_AUTO:
10035       op.beparams[param] = default_beparams[param]
10036   objects.UpgradeBeParams(op.beparams)
10037   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10038   return cluster.SimpleFillBE(op.beparams)
10039
10040
10041 def _CheckOpportunisticLocking(op):
10042   """Generate error if opportunistic locking is not possible.
10043
10044   """
10045   if op.opportunistic_locking and not op.iallocator:
10046     raise errors.OpPrereqError("Opportunistic locking is only available in"
10047                                " combination with an instance allocator",
10048                                errors.ECODE_INVAL)
10049
10050
10051 class LUInstanceCreate(LogicalUnit):
10052   """Create an instance.
10053
10054   """
10055   HPATH = "instance-add"
10056   HTYPE = constants.HTYPE_INSTANCE
10057   REQ_BGL = False
10058
10059   def CheckArguments(self):
10060     """Check arguments.
10061
10062     """
10063     # do not require name_check to ease forward/backward compatibility
10064     # for tools
10065     if self.op.no_install and self.op.start:
10066       self.LogInfo("No-installation mode selected, disabling startup")
10067       self.op.start = False
10068     # validate/normalize the instance name
10069     self.op.instance_name = \
10070       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10071
10072     if self.op.ip_check and not self.op.name_check:
10073       # TODO: make the ip check more flexible and not depend on the name check
10074       raise errors.OpPrereqError("Cannot do IP address check without a name"
10075                                  " check", errors.ECODE_INVAL)
10076
10077     # check nics' parameter names
10078     for nic in self.op.nics:
10079       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10080
10081     # check disks. parameter names and consistent adopt/no-adopt strategy
10082     has_adopt = has_no_adopt = False
10083     for disk in self.op.disks:
10084       if self.op.disk_template != constants.DT_EXT:
10085         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10086       if constants.IDISK_ADOPT in disk:
10087         has_adopt = True
10088       else:
10089         has_no_adopt = True
10090     if has_adopt and has_no_adopt:
10091       raise errors.OpPrereqError("Either all disks are adopted or none is",
10092                                  errors.ECODE_INVAL)
10093     if has_adopt:
10094       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10095         raise errors.OpPrereqError("Disk adoption is not supported for the"
10096                                    " '%s' disk template" %
10097                                    self.op.disk_template,
10098                                    errors.ECODE_INVAL)
10099       if self.op.iallocator is not None:
10100         raise errors.OpPrereqError("Disk adoption not allowed with an"
10101                                    " iallocator script", errors.ECODE_INVAL)
10102       if self.op.mode == constants.INSTANCE_IMPORT:
10103         raise errors.OpPrereqError("Disk adoption not allowed for"
10104                                    " instance import", errors.ECODE_INVAL)
10105     else:
10106       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10107         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10108                                    " but no 'adopt' parameter given" %
10109                                    self.op.disk_template,
10110                                    errors.ECODE_INVAL)
10111
10112     self.adopt_disks = has_adopt
10113
10114     # instance name verification
10115     if self.op.name_check:
10116       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10117       self.op.instance_name = self.hostname1.name
10118       # used in CheckPrereq for ip ping check
10119       self.check_ip = self.hostname1.ip
10120     else:
10121       self.check_ip = None
10122
10123     # file storage checks
10124     if (self.op.file_driver and
10125         not self.op.file_driver in constants.FILE_DRIVER):
10126       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10127                                  self.op.file_driver, errors.ECODE_INVAL)
10128
10129     if self.op.disk_template == constants.DT_FILE:
10130       opcodes.RequireFileStorage()
10131     elif self.op.disk_template == constants.DT_SHARED_FILE:
10132       opcodes.RequireSharedFileStorage()
10133
10134     ### Node/iallocator related checks
10135     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10136
10137     if self.op.pnode is not None:
10138       if self.op.disk_template in constants.DTS_INT_MIRROR:
10139         if self.op.snode is None:
10140           raise errors.OpPrereqError("The networked disk templates need"
10141                                      " a mirror node", errors.ECODE_INVAL)
10142       elif self.op.snode:
10143         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10144                         " template")
10145         self.op.snode = None
10146
10147     _CheckOpportunisticLocking(self.op)
10148
10149     self._cds = _GetClusterDomainSecret()
10150
10151     if self.op.mode == constants.INSTANCE_IMPORT:
10152       # On import force_variant must be True, because if we forced it at
10153       # initial install, our only chance when importing it back is that it
10154       # works again!
10155       self.op.force_variant = True
10156
10157       if self.op.no_install:
10158         self.LogInfo("No-installation mode has no effect during import")
10159
10160     elif self.op.mode == constants.INSTANCE_CREATE:
10161       if self.op.os_type is None:
10162         raise errors.OpPrereqError("No guest OS specified",
10163                                    errors.ECODE_INVAL)
10164       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10165         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10166                                    " installation" % self.op.os_type,
10167                                    errors.ECODE_STATE)
10168       if self.op.disk_template is None:
10169         raise errors.OpPrereqError("No disk template specified",
10170                                    errors.ECODE_INVAL)
10171
10172     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10173       # Check handshake to ensure both clusters have the same domain secret
10174       src_handshake = self.op.source_handshake
10175       if not src_handshake:
10176         raise errors.OpPrereqError("Missing source handshake",
10177                                    errors.ECODE_INVAL)
10178
10179       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10180                                                            src_handshake)
10181       if errmsg:
10182         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10183                                    errors.ECODE_INVAL)
10184
10185       # Load and check source CA
10186       self.source_x509_ca_pem = self.op.source_x509_ca
10187       if not self.source_x509_ca_pem:
10188         raise errors.OpPrereqError("Missing source X509 CA",
10189                                    errors.ECODE_INVAL)
10190
10191       try:
10192         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10193                                                     self._cds)
10194       except OpenSSL.crypto.Error, err:
10195         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10196                                    (err, ), errors.ECODE_INVAL)
10197
10198       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10199       if errcode is not None:
10200         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10201                                    errors.ECODE_INVAL)
10202
10203       self.source_x509_ca = cert
10204
10205       src_instance_name = self.op.source_instance_name
10206       if not src_instance_name:
10207         raise errors.OpPrereqError("Missing source instance name",
10208                                    errors.ECODE_INVAL)
10209
10210       self.source_instance_name = \
10211           netutils.GetHostname(name=src_instance_name).name
10212
10213     else:
10214       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10215                                  self.op.mode, errors.ECODE_INVAL)
10216
10217   def ExpandNames(self):
10218     """ExpandNames for CreateInstance.
10219
10220     Figure out the right locks for instance creation.
10221
10222     """
10223     self.needed_locks = {}
10224
10225     instance_name = self.op.instance_name
10226     # this is just a preventive check, but someone might still add this
10227     # instance in the meantime, and creation will fail at lock-add time
10228     if instance_name in self.cfg.GetInstanceList():
10229       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10230                                  instance_name, errors.ECODE_EXISTS)
10231
10232     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10233
10234     if self.op.iallocator:
10235       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10236       # specifying a group on instance creation and then selecting nodes from
10237       # that group
10238       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10239       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10240
10241       if self.op.opportunistic_locking:
10242         self.opportunistic_locks[locking.LEVEL_NODE] = True
10243         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10244     else:
10245       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10246       nodelist = [self.op.pnode]
10247       if self.op.snode is not None:
10248         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10249         nodelist.append(self.op.snode)
10250       self.needed_locks[locking.LEVEL_NODE] = nodelist
10251
10252     # in case of import lock the source node too
10253     if self.op.mode == constants.INSTANCE_IMPORT:
10254       src_node = self.op.src_node
10255       src_path = self.op.src_path
10256
10257       if src_path is None:
10258         self.op.src_path = src_path = self.op.instance_name
10259
10260       if src_node is None:
10261         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10262         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10263         self.op.src_node = None
10264         if os.path.isabs(src_path):
10265           raise errors.OpPrereqError("Importing an instance from a path"
10266                                      " requires a source node option",
10267                                      errors.ECODE_INVAL)
10268       else:
10269         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10270         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10271           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10272         if not os.path.isabs(src_path):
10273           self.op.src_path = src_path = \
10274             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10275
10276     self.needed_locks[locking.LEVEL_NODE_RES] = \
10277       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10278
10279   def _RunAllocator(self):
10280     """Run the allocator based on input opcode.
10281
10282     """
10283     if self.op.opportunistic_locking:
10284       # Only consider nodes for which a lock is held
10285       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10286     else:
10287       node_whitelist = None
10288
10289     #TODO Export network to iallocator so that it chooses a pnode
10290     #     in a nodegroup that has the desired network connected to
10291     req = _CreateInstanceAllocRequest(self.op, self.disks,
10292                                       self.nics, self.be_full,
10293                                       node_whitelist)
10294     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10295
10296     ial.Run(self.op.iallocator)
10297
10298     if not ial.success:
10299       # When opportunistic locks are used only a temporary failure is generated
10300       if self.op.opportunistic_locking:
10301         ecode = errors.ECODE_TEMP_NORES
10302       else:
10303         ecode = errors.ECODE_NORES
10304
10305       raise errors.OpPrereqError("Can't compute nodes using"
10306                                  " iallocator '%s': %s" %
10307                                  (self.op.iallocator, ial.info),
10308                                  ecode)
10309
10310     self.op.pnode = ial.result[0]
10311     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10312                  self.op.instance_name, self.op.iallocator,
10313                  utils.CommaJoin(ial.result))
10314
10315     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10316
10317     if req.RequiredNodes() == 2:
10318       self.op.snode = ial.result[1]
10319
10320   def BuildHooksEnv(self):
10321     """Build hooks env.
10322
10323     This runs on master, primary and secondary nodes of the instance.
10324
10325     """
10326     env = {
10327       "ADD_MODE": self.op.mode,
10328       }
10329     if self.op.mode == constants.INSTANCE_IMPORT:
10330       env["SRC_NODE"] = self.op.src_node
10331       env["SRC_PATH"] = self.op.src_path
10332       env["SRC_IMAGES"] = self.src_images
10333
10334     env.update(_BuildInstanceHookEnv(
10335       name=self.op.instance_name,
10336       primary_node=self.op.pnode,
10337       secondary_nodes=self.secondaries,
10338       status=self.op.start,
10339       os_type=self.op.os_type,
10340       minmem=self.be_full[constants.BE_MINMEM],
10341       maxmem=self.be_full[constants.BE_MAXMEM],
10342       vcpus=self.be_full[constants.BE_VCPUS],
10343       nics=_NICListToTuple(self, self.nics),
10344       disk_template=self.op.disk_template,
10345       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10346              for d in self.disks],
10347       bep=self.be_full,
10348       hvp=self.hv_full,
10349       hypervisor_name=self.op.hypervisor,
10350       tags=self.op.tags,
10351     ))
10352
10353     return env
10354
10355   def BuildHooksNodes(self):
10356     """Build hooks nodes.
10357
10358     """
10359     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10360     return nl, nl
10361
10362   def _ReadExportInfo(self):
10363     """Reads the export information from disk.
10364
10365     It will override the opcode source node and path with the actual
10366     information, if these two were not specified before.
10367
10368     @return: the export information
10369
10370     """
10371     assert self.op.mode == constants.INSTANCE_IMPORT
10372
10373     src_node = self.op.src_node
10374     src_path = self.op.src_path
10375
10376     if src_node is None:
10377       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10378       exp_list = self.rpc.call_export_list(locked_nodes)
10379       found = False
10380       for node in exp_list:
10381         if exp_list[node].fail_msg:
10382           continue
10383         if src_path in exp_list[node].payload:
10384           found = True
10385           self.op.src_node = src_node = node
10386           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10387                                                        src_path)
10388           break
10389       if not found:
10390         raise errors.OpPrereqError("No export found for relative path %s" %
10391                                     src_path, errors.ECODE_INVAL)
10392
10393     _CheckNodeOnline(self, src_node)
10394     result = self.rpc.call_export_info(src_node, src_path)
10395     result.Raise("No export or invalid export found in dir %s" % src_path)
10396
10397     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10398     if not export_info.has_section(constants.INISECT_EXP):
10399       raise errors.ProgrammerError("Corrupted export config",
10400                                    errors.ECODE_ENVIRON)
10401
10402     ei_version = export_info.get(constants.INISECT_EXP, "version")
10403     if (int(ei_version) != constants.EXPORT_VERSION):
10404       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10405                                  (ei_version, constants.EXPORT_VERSION),
10406                                  errors.ECODE_ENVIRON)
10407     return export_info
10408
10409   def _ReadExportParams(self, einfo):
10410     """Use export parameters as defaults.
10411
10412     In case the opcode doesn't specify (as in override) some instance
10413     parameters, then try to use them from the export information, if
10414     that declares them.
10415
10416     """
10417     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10418
10419     if self.op.disk_template is None:
10420       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10421         self.op.disk_template = einfo.get(constants.INISECT_INS,
10422                                           "disk_template")
10423         if self.op.disk_template not in constants.DISK_TEMPLATES:
10424           raise errors.OpPrereqError("Disk template specified in configuration"
10425                                      " file is not one of the allowed values:"
10426                                      " %s" %
10427                                      " ".join(constants.DISK_TEMPLATES),
10428                                      errors.ECODE_INVAL)
10429       else:
10430         raise errors.OpPrereqError("No disk template specified and the export"
10431                                    " is missing the disk_template information",
10432                                    errors.ECODE_INVAL)
10433
10434     if not self.op.disks:
10435       disks = []
10436       # TODO: import the disk iv_name too
10437       for idx in range(constants.MAX_DISKS):
10438         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10439           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10440           disks.append({constants.IDISK_SIZE: disk_sz})
10441       self.op.disks = disks
10442       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10443         raise errors.OpPrereqError("No disk info specified and the export"
10444                                    " is missing the disk information",
10445                                    errors.ECODE_INVAL)
10446
10447     if not self.op.nics:
10448       nics = []
10449       for idx in range(constants.MAX_NICS):
10450         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10451           ndict = {}
10452           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10453             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10454             ndict[name] = v
10455           nics.append(ndict)
10456         else:
10457           break
10458       self.op.nics = nics
10459
10460     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10461       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10462
10463     if (self.op.hypervisor is None and
10464         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10465       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10466
10467     if einfo.has_section(constants.INISECT_HYP):
10468       # use the export parameters but do not override the ones
10469       # specified by the user
10470       for name, value in einfo.items(constants.INISECT_HYP):
10471         if name not in self.op.hvparams:
10472           self.op.hvparams[name] = value
10473
10474     if einfo.has_section(constants.INISECT_BEP):
10475       # use the parameters, without overriding
10476       for name, value in einfo.items(constants.INISECT_BEP):
10477         if name not in self.op.beparams:
10478           self.op.beparams[name] = value
10479         # Compatibility for the old "memory" be param
10480         if name == constants.BE_MEMORY:
10481           if constants.BE_MAXMEM not in self.op.beparams:
10482             self.op.beparams[constants.BE_MAXMEM] = value
10483           if constants.BE_MINMEM not in self.op.beparams:
10484             self.op.beparams[constants.BE_MINMEM] = value
10485     else:
10486       # try to read the parameters old style, from the main section
10487       for name in constants.BES_PARAMETERS:
10488         if (name not in self.op.beparams and
10489             einfo.has_option(constants.INISECT_INS, name)):
10490           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10491
10492     if einfo.has_section(constants.INISECT_OSP):
10493       # use the parameters, without overriding
10494       for name, value in einfo.items(constants.INISECT_OSP):
10495         if name not in self.op.osparams:
10496           self.op.osparams[name] = value
10497
10498   def _RevertToDefaults(self, cluster):
10499     """Revert the instance parameters to the default values.
10500
10501     """
10502     # hvparams
10503     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10504     for name in self.op.hvparams.keys():
10505       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10506         del self.op.hvparams[name]
10507     # beparams
10508     be_defs = cluster.SimpleFillBE({})
10509     for name in self.op.beparams.keys():
10510       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10511         del self.op.beparams[name]
10512     # nic params
10513     nic_defs = cluster.SimpleFillNIC({})
10514     for nic in self.op.nics:
10515       for name in constants.NICS_PARAMETERS:
10516         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10517           del nic[name]
10518     # osparams
10519     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10520     for name in self.op.osparams.keys():
10521       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10522         del self.op.osparams[name]
10523
10524   def _CalculateFileStorageDir(self):
10525     """Calculate final instance file storage dir.
10526
10527     """
10528     # file storage dir calculation/check
10529     self.instance_file_storage_dir = None
10530     if self.op.disk_template in constants.DTS_FILEBASED:
10531       # build the full file storage dir path
10532       joinargs = []
10533
10534       if self.op.disk_template == constants.DT_SHARED_FILE:
10535         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10536       else:
10537         get_fsd_fn = self.cfg.GetFileStorageDir
10538
10539       cfg_storagedir = get_fsd_fn()
10540       if not cfg_storagedir:
10541         raise errors.OpPrereqError("Cluster file storage dir not defined",
10542                                    errors.ECODE_STATE)
10543       joinargs.append(cfg_storagedir)
10544
10545       if self.op.file_storage_dir is not None:
10546         joinargs.append(self.op.file_storage_dir)
10547
10548       joinargs.append(self.op.instance_name)
10549
10550       # pylint: disable=W0142
10551       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10552
10553   def CheckPrereq(self): # pylint: disable=R0914
10554     """Check prerequisites.
10555
10556     """
10557     self._CalculateFileStorageDir()
10558
10559     if self.op.mode == constants.INSTANCE_IMPORT:
10560       export_info = self._ReadExportInfo()
10561       self._ReadExportParams(export_info)
10562       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10563     else:
10564       self._old_instance_name = None
10565
10566     if (not self.cfg.GetVGName() and
10567         self.op.disk_template not in constants.DTS_NOT_LVM):
10568       raise errors.OpPrereqError("Cluster does not support lvm-based"
10569                                  " instances", errors.ECODE_STATE)
10570
10571     if (self.op.hypervisor is None or
10572         self.op.hypervisor == constants.VALUE_AUTO):
10573       self.op.hypervisor = self.cfg.GetHypervisorType()
10574
10575     cluster = self.cfg.GetClusterInfo()
10576     enabled_hvs = cluster.enabled_hypervisors
10577     if self.op.hypervisor not in enabled_hvs:
10578       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10579                                  " cluster (%s)" %
10580                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10581                                  errors.ECODE_STATE)
10582
10583     # Check tag validity
10584     for tag in self.op.tags:
10585       objects.TaggableObject.ValidateTag(tag)
10586
10587     # check hypervisor parameter syntax (locally)
10588     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10589     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10590                                       self.op.hvparams)
10591     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10592     hv_type.CheckParameterSyntax(filled_hvp)
10593     self.hv_full = filled_hvp
10594     # check that we don't specify global parameters on an instance
10595     _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10596                           "instance", "cluster")
10597
10598     # fill and remember the beparams dict
10599     self.be_full = _ComputeFullBeParams(self.op, cluster)
10600
10601     # build os parameters
10602     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10603
10604     # now that hvp/bep are in final format, let's reset to defaults,
10605     # if told to do so
10606     if self.op.identify_defaults:
10607       self._RevertToDefaults(cluster)
10608
10609     # NIC buildup
10610     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10611                              self.proc.GetECId())
10612
10613     # disk checks/pre-build
10614     default_vg = self.cfg.GetVGName()
10615     self.disks = _ComputeDisks(self.op, default_vg)
10616
10617     if self.op.mode == constants.INSTANCE_IMPORT:
10618       disk_images = []
10619       for idx in range(len(self.disks)):
10620         option = "disk%d_dump" % idx
10621         if export_info.has_option(constants.INISECT_INS, option):
10622           # FIXME: are the old os-es, disk sizes, etc. useful?
10623           export_name = export_info.get(constants.INISECT_INS, option)
10624           image = utils.PathJoin(self.op.src_path, export_name)
10625           disk_images.append(image)
10626         else:
10627           disk_images.append(False)
10628
10629       self.src_images = disk_images
10630
10631       if self.op.instance_name == self._old_instance_name:
10632         for idx, nic in enumerate(self.nics):
10633           if nic.mac == constants.VALUE_AUTO:
10634             nic_mac_ini = "nic%d_mac" % idx
10635             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10636
10637     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10638
10639     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10640     if self.op.ip_check:
10641       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10642         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10643                                    (self.check_ip, self.op.instance_name),
10644                                    errors.ECODE_NOTUNIQUE)
10645
10646     #### mac address generation
10647     # By generating here the mac address both the allocator and the hooks get
10648     # the real final mac address rather than the 'auto' or 'generate' value.
10649     # There is a race condition between the generation and the instance object
10650     # creation, which means that we know the mac is valid now, but we're not
10651     # sure it will be when we actually add the instance. If things go bad
10652     # adding the instance will abort because of a duplicate mac, and the
10653     # creation job will fail.
10654     for nic in self.nics:
10655       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10656         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10657
10658     #### allocator run
10659
10660     if self.op.iallocator is not None:
10661       self._RunAllocator()
10662
10663     # Release all unneeded node locks
10664     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10665     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10666     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10667     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10668
10669     assert (self.owned_locks(locking.LEVEL_NODE) ==
10670             self.owned_locks(locking.LEVEL_NODE_RES)), \
10671       "Node locks differ from node resource locks"
10672
10673     #### node related checks
10674
10675     # check primary node
10676     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10677     assert self.pnode is not None, \
10678       "Cannot retrieve locked node %s" % self.op.pnode
10679     if pnode.offline:
10680       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10681                                  pnode.name, errors.ECODE_STATE)
10682     if pnode.drained:
10683       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10684                                  pnode.name, errors.ECODE_STATE)
10685     if not pnode.vm_capable:
10686       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10687                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10688
10689     self.secondaries = []
10690
10691     # Fill in any IPs from IP pools. This must happen here, because we need to
10692     # know the nic's primary node, as specified by the iallocator
10693     for idx, nic in enumerate(self.nics):
10694       net = nic.network
10695       if net is not None:
10696         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10697         if netparams is None:
10698           raise errors.OpPrereqError("No netparams found for network"
10699                                      " %s. Propably not connected to"
10700                                      " node's %s nodegroup" %
10701                                      (net, self.pnode.name),
10702                                      errors.ECODE_INVAL)
10703         self.LogInfo("NIC/%d inherits netparams %s" %
10704                      (idx, netparams.values()))
10705         nic.nicparams = dict(netparams)
10706         if nic.ip is not None:
10707           if nic.ip.lower() == constants.NIC_IP_POOL:
10708             try:
10709               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10710             except errors.ReservationError:
10711               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10712                                          " from the address pool" % idx,
10713                                          errors.ECODE_STATE)
10714             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10715           else:
10716             try:
10717               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10718             except errors.ReservationError:
10719               raise errors.OpPrereqError("IP address %s already in use"
10720                                          " or does not belong to network %s" %
10721                                          (nic.ip, net),
10722                                          errors.ECODE_NOTUNIQUE)
10723
10724       # net is None, ip None or given
10725       elif self.op.conflicts_check:
10726         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10727
10728     # mirror node verification
10729     if self.op.disk_template in constants.DTS_INT_MIRROR:
10730       if self.op.snode == pnode.name:
10731         raise errors.OpPrereqError("The secondary node cannot be the"
10732                                    " primary node", errors.ECODE_INVAL)
10733       _CheckNodeOnline(self, self.op.snode)
10734       _CheckNodeNotDrained(self, self.op.snode)
10735       _CheckNodeVmCapable(self, self.op.snode)
10736       self.secondaries.append(self.op.snode)
10737
10738       snode = self.cfg.GetNodeInfo(self.op.snode)
10739       if pnode.group != snode.group:
10740         self.LogWarning("The primary and secondary nodes are in two"
10741                         " different node groups; the disk parameters"
10742                         " from the first disk's node group will be"
10743                         " used")
10744
10745     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10746       nodes = [pnode]
10747       if self.op.disk_template in constants.DTS_INT_MIRROR:
10748         nodes.append(snode)
10749       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10750       if compat.any(map(has_es, nodes)):
10751         raise errors.OpPrereqError("Disk template %s not supported with"
10752                                    " exclusive storage" % self.op.disk_template,
10753                                    errors.ECODE_STATE)
10754
10755     nodenames = [pnode.name] + self.secondaries
10756
10757     # Verify instance specs
10758     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10759     ispec = {
10760       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10761       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10762       constants.ISPEC_DISK_COUNT: len(self.disks),
10763       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10764       constants.ISPEC_NIC_COUNT: len(self.nics),
10765       constants.ISPEC_SPINDLE_USE: spindle_use,
10766       }
10767
10768     group_info = self.cfg.GetNodeGroup(pnode.group)
10769     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10770     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10771     if not self.op.ignore_ipolicy and res:
10772       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10773              (pnode.group, group_info.name, utils.CommaJoin(res)))
10774       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10775
10776     if not self.adopt_disks:
10777       if self.op.disk_template == constants.DT_RBD:
10778         # _CheckRADOSFreeSpace() is just a placeholder.
10779         # Any function that checks prerequisites can be placed here.
10780         # Check if there is enough space on the RADOS cluster.
10781         _CheckRADOSFreeSpace()
10782       elif self.op.disk_template == constants.DT_EXT:
10783         # FIXME: Function that checks prereqs if needed
10784         pass
10785       else:
10786         # Check lv size requirements, if not adopting
10787         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10788         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10789
10790     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10791       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10792                                 disk[constants.IDISK_ADOPT])
10793                      for disk in self.disks])
10794       if len(all_lvs) != len(self.disks):
10795         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10796                                    errors.ECODE_INVAL)
10797       for lv_name in all_lvs:
10798         try:
10799           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10800           # to ReserveLV uses the same syntax
10801           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10802         except errors.ReservationError:
10803           raise errors.OpPrereqError("LV named %s used by another instance" %
10804                                      lv_name, errors.ECODE_NOTUNIQUE)
10805
10806       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10807       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10808
10809       node_lvs = self.rpc.call_lv_list([pnode.name],
10810                                        vg_names.payload.keys())[pnode.name]
10811       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10812       node_lvs = node_lvs.payload
10813
10814       delta = all_lvs.difference(node_lvs.keys())
10815       if delta:
10816         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10817                                    utils.CommaJoin(delta),
10818                                    errors.ECODE_INVAL)
10819       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10820       if online_lvs:
10821         raise errors.OpPrereqError("Online logical volumes found, cannot"
10822                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10823                                    errors.ECODE_STATE)
10824       # update the size of disk based on what is found
10825       for dsk in self.disks:
10826         dsk[constants.IDISK_SIZE] = \
10827           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10828                                         dsk[constants.IDISK_ADOPT])][0]))
10829
10830     elif self.op.disk_template == constants.DT_BLOCK:
10831       # Normalize and de-duplicate device paths
10832       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10833                        for disk in self.disks])
10834       if len(all_disks) != len(self.disks):
10835         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10836                                    errors.ECODE_INVAL)
10837       baddisks = [d for d in all_disks
10838                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10839       if baddisks:
10840         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10841                                    " cannot be adopted" %
10842                                    (utils.CommaJoin(baddisks),
10843                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10844                                    errors.ECODE_INVAL)
10845
10846       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10847                                             list(all_disks))[pnode.name]
10848       node_disks.Raise("Cannot get block device information from node %s" %
10849                        pnode.name)
10850       node_disks = node_disks.payload
10851       delta = all_disks.difference(node_disks.keys())
10852       if delta:
10853         raise errors.OpPrereqError("Missing block device(s): %s" %
10854                                    utils.CommaJoin(delta),
10855                                    errors.ECODE_INVAL)
10856       for dsk in self.disks:
10857         dsk[constants.IDISK_SIZE] = \
10858           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10859
10860     # Verify instance specs
10861     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10862     ispec = {
10863       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10864       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10865       constants.ISPEC_DISK_COUNT: len(self.disks),
10866       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10867                                   for disk in self.disks],
10868       constants.ISPEC_NIC_COUNT: len(self.nics),
10869       constants.ISPEC_SPINDLE_USE: spindle_use,
10870       }
10871
10872     group_info = self.cfg.GetNodeGroup(pnode.group)
10873     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10874     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10875     if not self.op.ignore_ipolicy and res:
10876       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10877                                   " policy: %s") % (pnode.group,
10878                                                     utils.CommaJoin(res)),
10879                                   errors.ECODE_INVAL)
10880
10881     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10882
10883     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10884     # check OS parameters (remotely)
10885     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10886
10887     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10888
10889     #TODO: _CheckExtParams (remotely)
10890     # Check parameters for extstorage
10891
10892     # memory check on primary node
10893     #TODO(dynmem): use MINMEM for checking
10894     if self.op.start:
10895       _CheckNodeFreeMemory(self, self.pnode.name,
10896                            "creating instance %s" % self.op.instance_name,
10897                            self.be_full[constants.BE_MAXMEM],
10898                            self.op.hypervisor)
10899
10900     self.dry_run_result = list(nodenames)
10901
10902   def Exec(self, feedback_fn):
10903     """Create and add the instance to the cluster.
10904
10905     """
10906     instance = self.op.instance_name
10907     pnode_name = self.pnode.name
10908
10909     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10910                 self.owned_locks(locking.LEVEL_NODE)), \
10911       "Node locks differ from node resource locks"
10912     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10913
10914     ht_kind = self.op.hypervisor
10915     if ht_kind in constants.HTS_REQ_PORT:
10916       network_port = self.cfg.AllocatePort()
10917     else:
10918       network_port = None
10919
10920     # This is ugly but we got a chicken-egg problem here
10921     # We can only take the group disk parameters, as the instance
10922     # has no disks yet (we are generating them right here).
10923     node = self.cfg.GetNodeInfo(pnode_name)
10924     nodegroup = self.cfg.GetNodeGroup(node.group)
10925     disks = _GenerateDiskTemplate(self,
10926                                   self.op.disk_template,
10927                                   instance, pnode_name,
10928                                   self.secondaries,
10929                                   self.disks,
10930                                   self.instance_file_storage_dir,
10931                                   self.op.file_driver,
10932                                   0,
10933                                   feedback_fn,
10934                                   self.cfg.GetGroupDiskParams(nodegroup))
10935
10936     iobj = objects.Instance(name=instance, os=self.op.os_type,
10937                             primary_node=pnode_name,
10938                             nics=self.nics, disks=disks,
10939                             disk_template=self.op.disk_template,
10940                             admin_state=constants.ADMINST_DOWN,
10941                             network_port=network_port,
10942                             beparams=self.op.beparams,
10943                             hvparams=self.op.hvparams,
10944                             hypervisor=self.op.hypervisor,
10945                             osparams=self.op.osparams,
10946                             )
10947
10948     if self.op.tags:
10949       for tag in self.op.tags:
10950         iobj.AddTag(tag)
10951
10952     if self.adopt_disks:
10953       if self.op.disk_template == constants.DT_PLAIN:
10954         # rename LVs to the newly-generated names; we need to construct
10955         # 'fake' LV disks with the old data, plus the new unique_id
10956         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10957         rename_to = []
10958         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10959           rename_to.append(t_dsk.logical_id)
10960           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10961           self.cfg.SetDiskID(t_dsk, pnode_name)
10962         result = self.rpc.call_blockdev_rename(pnode_name,
10963                                                zip(tmp_disks, rename_to))
10964         result.Raise("Failed to rename adoped LVs")
10965     else:
10966       feedback_fn("* creating instance disks...")
10967       try:
10968         _CreateDisks(self, iobj)
10969       except errors.OpExecError:
10970         self.LogWarning("Device creation failed, reverting...")
10971         try:
10972           _RemoveDisks(self, iobj)
10973         finally:
10974           self.cfg.ReleaseDRBDMinors(instance)
10975           raise
10976
10977     feedback_fn("adding instance %s to cluster config" % instance)
10978
10979     self.cfg.AddInstance(iobj, self.proc.GetECId())
10980
10981     # Declare that we don't want to remove the instance lock anymore, as we've
10982     # added the instance to the config
10983     del self.remove_locks[locking.LEVEL_INSTANCE]
10984
10985     if self.op.mode == constants.INSTANCE_IMPORT:
10986       # Release unused nodes
10987       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10988     else:
10989       # Release all nodes
10990       _ReleaseLocks(self, locking.LEVEL_NODE)
10991
10992     disk_abort = False
10993     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10994       feedback_fn("* wiping instance disks...")
10995       try:
10996         _WipeDisks(self, iobj)
10997       except errors.OpExecError, err:
10998         logging.exception("Wiping disks failed")
10999         self.LogWarning("Wiping instance disks failed (%s)", err)
11000         disk_abort = True
11001
11002     if disk_abort:
11003       # Something is already wrong with the disks, don't do anything else
11004       pass
11005     elif self.op.wait_for_sync:
11006       disk_abort = not _WaitForSync(self, iobj)
11007     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11008       # make sure the disks are not degraded (still sync-ing is ok)
11009       feedback_fn("* checking mirrors status")
11010       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11011     else:
11012       disk_abort = False
11013
11014     if disk_abort:
11015       _RemoveDisks(self, iobj)
11016       self.cfg.RemoveInstance(iobj.name)
11017       # Make sure the instance lock gets removed
11018       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11019       raise errors.OpExecError("There are some degraded disks for"
11020                                " this instance")
11021
11022     # Release all node resource locks
11023     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11024
11025     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11026       # we need to set the disks ID to the primary node, since the
11027       # preceding code might or might have not done it, depending on
11028       # disk template and other options
11029       for disk in iobj.disks:
11030         self.cfg.SetDiskID(disk, pnode_name)
11031       if self.op.mode == constants.INSTANCE_CREATE:
11032         if not self.op.no_install:
11033           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11034                         not self.op.wait_for_sync)
11035           if pause_sync:
11036             feedback_fn("* pausing disk sync to install instance OS")
11037             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11038                                                               (iobj.disks,
11039                                                                iobj), True)
11040             for idx, success in enumerate(result.payload):
11041               if not success:
11042                 logging.warn("pause-sync of instance %s for disk %d failed",
11043                              instance, idx)
11044
11045           feedback_fn("* running the instance OS create scripts...")
11046           # FIXME: pass debug option from opcode to backend
11047           os_add_result = \
11048             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11049                                           self.op.debug_level)
11050           if pause_sync:
11051             feedback_fn("* resuming disk sync")
11052             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11053                                                               (iobj.disks,
11054                                                                iobj), False)
11055             for idx, success in enumerate(result.payload):
11056               if not success:
11057                 logging.warn("resume-sync of instance %s for disk %d failed",
11058                              instance, idx)
11059
11060           os_add_result.Raise("Could not add os for instance %s"
11061                               " on node %s" % (instance, pnode_name))
11062
11063       else:
11064         if self.op.mode == constants.INSTANCE_IMPORT:
11065           feedback_fn("* running the instance OS import scripts...")
11066
11067           transfers = []
11068
11069           for idx, image in enumerate(self.src_images):
11070             if not image:
11071               continue
11072
11073             # FIXME: pass debug option from opcode to backend
11074             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11075                                                constants.IEIO_FILE, (image, ),
11076                                                constants.IEIO_SCRIPT,
11077                                                (iobj.disks[idx], idx),
11078                                                None)
11079             transfers.append(dt)
11080
11081           import_result = \
11082             masterd.instance.TransferInstanceData(self, feedback_fn,
11083                                                   self.op.src_node, pnode_name,
11084                                                   self.pnode.secondary_ip,
11085                                                   iobj, transfers)
11086           if not compat.all(import_result):
11087             self.LogWarning("Some disks for instance %s on node %s were not"
11088                             " imported successfully" % (instance, pnode_name))
11089
11090           rename_from = self._old_instance_name
11091
11092         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11093           feedback_fn("* preparing remote import...")
11094           # The source cluster will stop the instance before attempting to make
11095           # a connection. In some cases stopping an instance can take a long
11096           # time, hence the shutdown timeout is added to the connection
11097           # timeout.
11098           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11099                              self.op.source_shutdown_timeout)
11100           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11101
11102           assert iobj.primary_node == self.pnode.name
11103           disk_results = \
11104             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11105                                           self.source_x509_ca,
11106                                           self._cds, timeouts)
11107           if not compat.all(disk_results):
11108             # TODO: Should the instance still be started, even if some disks
11109             # failed to import (valid for local imports, too)?
11110             self.LogWarning("Some disks for instance %s on node %s were not"
11111                             " imported successfully" % (instance, pnode_name))
11112
11113           rename_from = self.source_instance_name
11114
11115         else:
11116           # also checked in the prereq part
11117           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11118                                        % self.op.mode)
11119
11120         # Run rename script on newly imported instance
11121         assert iobj.name == instance
11122         feedback_fn("Running rename script for %s" % instance)
11123         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11124                                                    rename_from,
11125                                                    self.op.debug_level)
11126         if result.fail_msg:
11127           self.LogWarning("Failed to run rename script for %s on node"
11128                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11129
11130     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11131
11132     if self.op.start:
11133       iobj.admin_state = constants.ADMINST_UP
11134       self.cfg.Update(iobj, feedback_fn)
11135       logging.info("Starting instance %s on node %s", instance, pnode_name)
11136       feedback_fn("* starting instance...")
11137       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11138                                             False)
11139       result.Raise("Could not start instance")
11140
11141     return list(iobj.all_nodes)
11142
11143
11144 class LUInstanceMultiAlloc(NoHooksLU):
11145   """Allocates multiple instances at the same time.
11146
11147   """
11148   REQ_BGL = False
11149
11150   def CheckArguments(self):
11151     """Check arguments.
11152
11153     """
11154     nodes = []
11155     for inst in self.op.instances:
11156       if inst.iallocator is not None:
11157         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11158                                    " instance objects", errors.ECODE_INVAL)
11159       nodes.append(bool(inst.pnode))
11160       if inst.disk_template in constants.DTS_INT_MIRROR:
11161         nodes.append(bool(inst.snode))
11162
11163     has_nodes = compat.any(nodes)
11164     if compat.all(nodes) ^ has_nodes:
11165       raise errors.OpPrereqError("There are instance objects providing"
11166                                  " pnode/snode while others do not",
11167                                  errors.ECODE_INVAL)
11168
11169     if self.op.iallocator is None:
11170       default_iallocator = self.cfg.GetDefaultIAllocator()
11171       if default_iallocator and has_nodes:
11172         self.op.iallocator = default_iallocator
11173       else:
11174         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11175                                    " given and no cluster-wide default"
11176                                    " iallocator found; please specify either"
11177                                    " an iallocator or nodes on the instances"
11178                                    " or set a cluster-wide default iallocator",
11179                                    errors.ECODE_INVAL)
11180
11181     _CheckOpportunisticLocking(self.op)
11182
11183     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11184     if dups:
11185       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11186                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11187
11188   def ExpandNames(self):
11189     """Calculate the locks.
11190
11191     """
11192     self.share_locks = _ShareAll()
11193     self.needed_locks = {
11194       # iallocator will select nodes and even if no iallocator is used,
11195       # collisions with LUInstanceCreate should be avoided
11196       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11197       }
11198
11199     if self.op.iallocator:
11200       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11201       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11202
11203       if self.op.opportunistic_locking:
11204         self.opportunistic_locks[locking.LEVEL_NODE] = True
11205         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11206     else:
11207       nodeslist = []
11208       for inst in self.op.instances:
11209         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11210         nodeslist.append(inst.pnode)
11211         if inst.snode is not None:
11212           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11213           nodeslist.append(inst.snode)
11214
11215       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11216       # Lock resources of instance's primary and secondary nodes (copy to
11217       # prevent accidential modification)
11218       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11219
11220   def CheckPrereq(self):
11221     """Check prerequisite.
11222
11223     """
11224     cluster = self.cfg.GetClusterInfo()
11225     default_vg = self.cfg.GetVGName()
11226     ec_id = self.proc.GetECId()
11227
11228     if self.op.opportunistic_locking:
11229       # Only consider nodes for which a lock is held
11230       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11231     else:
11232       node_whitelist = None
11233
11234     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11235                                          _ComputeNics(op, cluster, None,
11236                                                       self.cfg, ec_id),
11237                                          _ComputeFullBeParams(op, cluster),
11238                                          node_whitelist)
11239              for op in self.op.instances]
11240
11241     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11242     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11243
11244     ial.Run(self.op.iallocator)
11245
11246     if not ial.success:
11247       raise errors.OpPrereqError("Can't compute nodes using"
11248                                  " iallocator '%s': %s" %
11249                                  (self.op.iallocator, ial.info),
11250                                  errors.ECODE_NORES)
11251
11252     self.ia_result = ial.result
11253
11254     if self.op.dry_run:
11255       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11256         constants.JOB_IDS_KEY: [],
11257         })
11258
11259   def _ConstructPartialResult(self):
11260     """Contructs the partial result.
11261
11262     """
11263     (allocatable, failed) = self.ia_result
11264     return {
11265       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11266         map(compat.fst, allocatable),
11267       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11268       }
11269
11270   def Exec(self, feedback_fn):
11271     """Executes the opcode.
11272
11273     """
11274     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11275     (allocatable, failed) = self.ia_result
11276
11277     jobs = []
11278     for (name, nodes) in allocatable:
11279       op = op2inst.pop(name)
11280
11281       if len(nodes) > 1:
11282         (op.pnode, op.snode) = nodes
11283       else:
11284         (op.pnode,) = nodes
11285
11286       jobs.append([op])
11287
11288     missing = set(op2inst.keys()) - set(failed)
11289     assert not missing, \
11290       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11291
11292     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11293
11294
11295 def _CheckRADOSFreeSpace():
11296   """Compute disk size requirements inside the RADOS cluster.
11297
11298   """
11299   # For the RADOS cluster we assume there is always enough space.
11300   pass
11301
11302
11303 class LUInstanceConsole(NoHooksLU):
11304   """Connect to an instance's console.
11305
11306   This is somewhat special in that it returns the command line that
11307   you need to run on the master node in order to connect to the
11308   console.
11309
11310   """
11311   REQ_BGL = False
11312
11313   def ExpandNames(self):
11314     self.share_locks = _ShareAll()
11315     self._ExpandAndLockInstance()
11316
11317   def CheckPrereq(self):
11318     """Check prerequisites.
11319
11320     This checks that the instance is in the cluster.
11321
11322     """
11323     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11324     assert self.instance is not None, \
11325       "Cannot retrieve locked instance %s" % self.op.instance_name
11326     _CheckNodeOnline(self, self.instance.primary_node)
11327
11328   def Exec(self, feedback_fn):
11329     """Connect to the console of an instance
11330
11331     """
11332     instance = self.instance
11333     node = instance.primary_node
11334
11335     node_insts = self.rpc.call_instance_list([node],
11336                                              [instance.hypervisor])[node]
11337     node_insts.Raise("Can't get node information from %s" % node)
11338
11339     if instance.name not in node_insts.payload:
11340       if instance.admin_state == constants.ADMINST_UP:
11341         state = constants.INSTST_ERRORDOWN
11342       elif instance.admin_state == constants.ADMINST_DOWN:
11343         state = constants.INSTST_ADMINDOWN
11344       else:
11345         state = constants.INSTST_ADMINOFFLINE
11346       raise errors.OpExecError("Instance %s is not running (state %s)" %
11347                                (instance.name, state))
11348
11349     logging.debug("Connecting to console of %s on %s", instance.name, node)
11350
11351     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11352
11353
11354 def _GetInstanceConsole(cluster, instance):
11355   """Returns console information for an instance.
11356
11357   @type cluster: L{objects.Cluster}
11358   @type instance: L{objects.Instance}
11359   @rtype: dict
11360
11361   """
11362   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11363   # beparams and hvparams are passed separately, to avoid editing the
11364   # instance and then saving the defaults in the instance itself.
11365   hvparams = cluster.FillHV(instance)
11366   beparams = cluster.FillBE(instance)
11367   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11368
11369   assert console.instance == instance.name
11370   assert console.Validate()
11371
11372   return console.ToDict()
11373
11374
11375 class LUInstanceReplaceDisks(LogicalUnit):
11376   """Replace the disks of an instance.
11377
11378   """
11379   HPATH = "mirrors-replace"
11380   HTYPE = constants.HTYPE_INSTANCE
11381   REQ_BGL = False
11382
11383   def CheckArguments(self):
11384     """Check arguments.
11385
11386     """
11387     remote_node = self.op.remote_node
11388     ialloc = self.op.iallocator
11389     if self.op.mode == constants.REPLACE_DISK_CHG:
11390       if remote_node is None and ialloc is None:
11391         raise errors.OpPrereqError("When changing the secondary either an"
11392                                    " iallocator script must be used or the"
11393                                    " new node given", errors.ECODE_INVAL)
11394       else:
11395         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11396
11397     elif remote_node is not None or ialloc is not None:
11398       # Not replacing the secondary
11399       raise errors.OpPrereqError("The iallocator and new node options can"
11400                                  " only be used when changing the"
11401                                  " secondary node", errors.ECODE_INVAL)
11402
11403   def ExpandNames(self):
11404     self._ExpandAndLockInstance()
11405
11406     assert locking.LEVEL_NODE not in self.needed_locks
11407     assert locking.LEVEL_NODE_RES not in self.needed_locks
11408     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11409
11410     assert self.op.iallocator is None or self.op.remote_node is None, \
11411       "Conflicting options"
11412
11413     if self.op.remote_node is not None:
11414       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11415
11416       # Warning: do not remove the locking of the new secondary here
11417       # unless DRBD8.AddChildren is changed to work in parallel;
11418       # currently it doesn't since parallel invocations of
11419       # FindUnusedMinor will conflict
11420       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11421       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11422     else:
11423       self.needed_locks[locking.LEVEL_NODE] = []
11424       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11425
11426       if self.op.iallocator is not None:
11427         # iallocator will select a new node in the same group
11428         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11429         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11430
11431     self.needed_locks[locking.LEVEL_NODE_RES] = []
11432
11433     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11434                                    self.op.iallocator, self.op.remote_node,
11435                                    self.op.disks, self.op.early_release,
11436                                    self.op.ignore_ipolicy)
11437
11438     self.tasklets = [self.replacer]
11439
11440   def DeclareLocks(self, level):
11441     if level == locking.LEVEL_NODEGROUP:
11442       assert self.op.remote_node is None
11443       assert self.op.iallocator is not None
11444       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11445
11446       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11447       # Lock all groups used by instance optimistically; this requires going
11448       # via the node before it's locked, requiring verification later on
11449       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11450         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11451
11452     elif level == locking.LEVEL_NODE:
11453       if self.op.iallocator is not None:
11454         assert self.op.remote_node is None
11455         assert not self.needed_locks[locking.LEVEL_NODE]
11456         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11457
11458         # Lock member nodes of all locked groups
11459         self.needed_locks[locking.LEVEL_NODE] = \
11460             [node_name
11461              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11462              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11463       else:
11464         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11465
11466         self._LockInstancesNodes()
11467
11468     elif level == locking.LEVEL_NODE_RES:
11469       # Reuse node locks
11470       self.needed_locks[locking.LEVEL_NODE_RES] = \
11471         self.needed_locks[locking.LEVEL_NODE]
11472
11473   def BuildHooksEnv(self):
11474     """Build hooks env.
11475
11476     This runs on the master, the primary and all the secondaries.
11477
11478     """
11479     instance = self.replacer.instance
11480     env = {
11481       "MODE": self.op.mode,
11482       "NEW_SECONDARY": self.op.remote_node,
11483       "OLD_SECONDARY": instance.secondary_nodes[0],
11484       }
11485     env.update(_BuildInstanceHookEnvByObject(self, instance))
11486     return env
11487
11488   def BuildHooksNodes(self):
11489     """Build hooks nodes.
11490
11491     """
11492     instance = self.replacer.instance
11493     nl = [
11494       self.cfg.GetMasterNode(),
11495       instance.primary_node,
11496       ]
11497     if self.op.remote_node is not None:
11498       nl.append(self.op.remote_node)
11499     return nl, nl
11500
11501   def CheckPrereq(self):
11502     """Check prerequisites.
11503
11504     """
11505     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11506             self.op.iallocator is None)
11507
11508     # Verify if node group locks are still correct
11509     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11510     if owned_groups:
11511       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11512
11513     return LogicalUnit.CheckPrereq(self)
11514
11515
11516 class TLReplaceDisks(Tasklet):
11517   """Replaces disks for an instance.
11518
11519   Note: Locking is not within the scope of this class.
11520
11521   """
11522   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11523                disks, early_release, ignore_ipolicy):
11524     """Initializes this class.
11525
11526     """
11527     Tasklet.__init__(self, lu)
11528
11529     # Parameters
11530     self.instance_name = instance_name
11531     self.mode = mode
11532     self.iallocator_name = iallocator_name
11533     self.remote_node = remote_node
11534     self.disks = disks
11535     self.early_release = early_release
11536     self.ignore_ipolicy = ignore_ipolicy
11537
11538     # Runtime data
11539     self.instance = None
11540     self.new_node = None
11541     self.target_node = None
11542     self.other_node = None
11543     self.remote_node_info = None
11544     self.node_secondary_ip = None
11545
11546   @staticmethod
11547   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11548     """Compute a new secondary node using an IAllocator.
11549
11550     """
11551     req = iallocator.IAReqRelocate(name=instance_name,
11552                                    relocate_from=list(relocate_from))
11553     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11554
11555     ial.Run(iallocator_name)
11556
11557     if not ial.success:
11558       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11559                                  " %s" % (iallocator_name, ial.info),
11560                                  errors.ECODE_NORES)
11561
11562     remote_node_name = ial.result[0]
11563
11564     lu.LogInfo("Selected new secondary for instance '%s': %s",
11565                instance_name, remote_node_name)
11566
11567     return remote_node_name
11568
11569   def _FindFaultyDisks(self, node_name):
11570     """Wrapper for L{_FindFaultyInstanceDisks}.
11571
11572     """
11573     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11574                                     node_name, True)
11575
11576   def _CheckDisksActivated(self, instance):
11577     """Checks if the instance disks are activated.
11578
11579     @param instance: The instance to check disks
11580     @return: True if they are activated, False otherwise
11581
11582     """
11583     nodes = instance.all_nodes
11584
11585     for idx, dev in enumerate(instance.disks):
11586       for node in nodes:
11587         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11588         self.cfg.SetDiskID(dev, node)
11589
11590         result = _BlockdevFind(self, node, dev, instance)
11591
11592         if result.offline:
11593           continue
11594         elif result.fail_msg or not result.payload:
11595           return False
11596
11597     return True
11598
11599   def CheckPrereq(self):
11600     """Check prerequisites.
11601
11602     This checks that the instance is in the cluster.
11603
11604     """
11605     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11606     assert instance is not None, \
11607       "Cannot retrieve locked instance %s" % self.instance_name
11608
11609     if instance.disk_template != constants.DT_DRBD8:
11610       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11611                                  " instances", errors.ECODE_INVAL)
11612
11613     if len(instance.secondary_nodes) != 1:
11614       raise errors.OpPrereqError("The instance has a strange layout,"
11615                                  " expected one secondary but found %d" %
11616                                  len(instance.secondary_nodes),
11617                                  errors.ECODE_FAULT)
11618
11619     instance = self.instance
11620     secondary_node = instance.secondary_nodes[0]
11621
11622     if self.iallocator_name is None:
11623       remote_node = self.remote_node
11624     else:
11625       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11626                                        instance.name, instance.secondary_nodes)
11627
11628     if remote_node is None:
11629       self.remote_node_info = None
11630     else:
11631       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11632              "Remote node '%s' is not locked" % remote_node
11633
11634       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11635       assert self.remote_node_info is not None, \
11636         "Cannot retrieve locked node %s" % remote_node
11637
11638     if remote_node == self.instance.primary_node:
11639       raise errors.OpPrereqError("The specified node is the primary node of"
11640                                  " the instance", errors.ECODE_INVAL)
11641
11642     if remote_node == secondary_node:
11643       raise errors.OpPrereqError("The specified node is already the"
11644                                  " secondary node of the instance",
11645                                  errors.ECODE_INVAL)
11646
11647     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11648                                     constants.REPLACE_DISK_CHG):
11649       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11650                                  errors.ECODE_INVAL)
11651
11652     if self.mode == constants.REPLACE_DISK_AUTO:
11653       if not self._CheckDisksActivated(instance):
11654         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11655                                    " first" % self.instance_name,
11656                                    errors.ECODE_STATE)
11657       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11658       faulty_secondary = self._FindFaultyDisks(secondary_node)
11659
11660       if faulty_primary and faulty_secondary:
11661         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11662                                    " one node and can not be repaired"
11663                                    " automatically" % self.instance_name,
11664                                    errors.ECODE_STATE)
11665
11666       if faulty_primary:
11667         self.disks = faulty_primary
11668         self.target_node = instance.primary_node
11669         self.other_node = secondary_node
11670         check_nodes = [self.target_node, self.other_node]
11671       elif faulty_secondary:
11672         self.disks = faulty_secondary
11673         self.target_node = secondary_node
11674         self.other_node = instance.primary_node
11675         check_nodes = [self.target_node, self.other_node]
11676       else:
11677         self.disks = []
11678         check_nodes = []
11679
11680     else:
11681       # Non-automatic modes
11682       if self.mode == constants.REPLACE_DISK_PRI:
11683         self.target_node = instance.primary_node
11684         self.other_node = secondary_node
11685         check_nodes = [self.target_node, self.other_node]
11686
11687       elif self.mode == constants.REPLACE_DISK_SEC:
11688         self.target_node = secondary_node
11689         self.other_node = instance.primary_node
11690         check_nodes = [self.target_node, self.other_node]
11691
11692       elif self.mode == constants.REPLACE_DISK_CHG:
11693         self.new_node = remote_node
11694         self.other_node = instance.primary_node
11695         self.target_node = secondary_node
11696         check_nodes = [self.new_node, self.other_node]
11697
11698         _CheckNodeNotDrained(self.lu, remote_node)
11699         _CheckNodeVmCapable(self.lu, remote_node)
11700
11701         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11702         assert old_node_info is not None
11703         if old_node_info.offline and not self.early_release:
11704           # doesn't make sense to delay the release
11705           self.early_release = True
11706           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11707                           " early-release mode", secondary_node)
11708
11709       else:
11710         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11711                                      self.mode)
11712
11713       # If not specified all disks should be replaced
11714       if not self.disks:
11715         self.disks = range(len(self.instance.disks))
11716
11717     # TODO: This is ugly, but right now we can't distinguish between internal
11718     # submitted opcode and external one. We should fix that.
11719     if self.remote_node_info:
11720       # We change the node, lets verify it still meets instance policy
11721       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11722       cluster = self.cfg.GetClusterInfo()
11723       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11724                                                               new_group_info)
11725       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11726                               ignore=self.ignore_ipolicy)
11727
11728     for node in check_nodes:
11729       _CheckNodeOnline(self.lu, node)
11730
11731     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11732                                                           self.other_node,
11733                                                           self.target_node]
11734                               if node_name is not None)
11735
11736     # Release unneeded node and node resource locks
11737     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11738     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11739     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11740
11741     # Release any owned node group
11742     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11743
11744     # Check whether disks are valid
11745     for disk_idx in self.disks:
11746       instance.FindDisk(disk_idx)
11747
11748     # Get secondary node IP addresses
11749     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11750                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11751
11752   def Exec(self, feedback_fn):
11753     """Execute disk replacement.
11754
11755     This dispatches the disk replacement to the appropriate handler.
11756
11757     """
11758     if __debug__:
11759       # Verify owned locks before starting operation
11760       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11761       assert set(owned_nodes) == set(self.node_secondary_ip), \
11762           ("Incorrect node locks, owning %s, expected %s" %
11763            (owned_nodes, self.node_secondary_ip.keys()))
11764       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11765               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11766       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11767
11768       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11769       assert list(owned_instances) == [self.instance_name], \
11770           "Instance '%s' not locked" % self.instance_name
11771
11772       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11773           "Should not own any node group lock at this point"
11774
11775     if not self.disks:
11776       feedback_fn("No disks need replacement for instance '%s'" %
11777                   self.instance.name)
11778       return
11779
11780     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11781                 (utils.CommaJoin(self.disks), self.instance.name))
11782     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11783     feedback_fn("Current seconary node: %s" %
11784                 utils.CommaJoin(self.instance.secondary_nodes))
11785
11786     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11787
11788     # Activate the instance disks if we're replacing them on a down instance
11789     if activate_disks:
11790       _StartInstanceDisks(self.lu, self.instance, True)
11791
11792     try:
11793       # Should we replace the secondary node?
11794       if self.new_node is not None:
11795         fn = self._ExecDrbd8Secondary
11796       else:
11797         fn = self._ExecDrbd8DiskOnly
11798
11799       result = fn(feedback_fn)
11800     finally:
11801       # Deactivate the instance disks if we're replacing them on a
11802       # down instance
11803       if activate_disks:
11804         _SafeShutdownInstanceDisks(self.lu, self.instance)
11805
11806     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11807
11808     if __debug__:
11809       # Verify owned locks
11810       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11811       nodes = frozenset(self.node_secondary_ip)
11812       assert ((self.early_release and not owned_nodes) or
11813               (not self.early_release and not (set(owned_nodes) - nodes))), \
11814         ("Not owning the correct locks, early_release=%s, owned=%r,"
11815          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11816
11817     return result
11818
11819   def _CheckVolumeGroup(self, nodes):
11820     self.lu.LogInfo("Checking volume groups")
11821
11822     vgname = self.cfg.GetVGName()
11823
11824     # Make sure volume group exists on all involved nodes
11825     results = self.rpc.call_vg_list(nodes)
11826     if not results:
11827       raise errors.OpExecError("Can't list volume groups on the nodes")
11828
11829     for node in nodes:
11830       res = results[node]
11831       res.Raise("Error checking node %s" % node)
11832       if vgname not in res.payload:
11833         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11834                                  (vgname, node))
11835
11836   def _CheckDisksExistence(self, nodes):
11837     # Check disk existence
11838     for idx, dev in enumerate(self.instance.disks):
11839       if idx not in self.disks:
11840         continue
11841
11842       for node in nodes:
11843         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11844         self.cfg.SetDiskID(dev, node)
11845
11846         result = _BlockdevFind(self, node, dev, self.instance)
11847
11848         msg = result.fail_msg
11849         if msg or not result.payload:
11850           if not msg:
11851             msg = "disk not found"
11852           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11853                                    (idx, node, msg))
11854
11855   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11856     for idx, dev in enumerate(self.instance.disks):
11857       if idx not in self.disks:
11858         continue
11859
11860       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11861                       (idx, node_name))
11862
11863       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11864                                    on_primary, ldisk=ldisk):
11865         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11866                                  " replace disks for instance %s" %
11867                                  (node_name, self.instance.name))
11868
11869   def _CreateNewStorage(self, node_name):
11870     """Create new storage on the primary or secondary node.
11871
11872     This is only used for same-node replaces, not for changing the
11873     secondary node, hence we don't want to modify the existing disk.
11874
11875     """
11876     iv_names = {}
11877
11878     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11879     for idx, dev in enumerate(disks):
11880       if idx not in self.disks:
11881         continue
11882
11883       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11884
11885       self.cfg.SetDiskID(dev, node_name)
11886
11887       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11888       names = _GenerateUniqueNames(self.lu, lv_names)
11889
11890       (data_disk, meta_disk) = dev.children
11891       vg_data = data_disk.logical_id[0]
11892       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11893                              logical_id=(vg_data, names[0]),
11894                              params=data_disk.params)
11895       vg_meta = meta_disk.logical_id[0]
11896       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11897                              size=constants.DRBD_META_SIZE,
11898                              logical_id=(vg_meta, names[1]),
11899                              params=meta_disk.params)
11900
11901       new_lvs = [lv_data, lv_meta]
11902       old_lvs = [child.Copy() for child in dev.children]
11903       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11904       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11905
11906       # we pass force_create=True to force the LVM creation
11907       for new_lv in new_lvs:
11908         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11909                              _GetInstanceInfoText(self.instance), False,
11910                              excl_stor)
11911
11912     return iv_names
11913
11914   def _CheckDevices(self, node_name, iv_names):
11915     for name, (dev, _, _) in iv_names.iteritems():
11916       self.cfg.SetDiskID(dev, node_name)
11917
11918       result = _BlockdevFind(self, node_name, dev, self.instance)
11919
11920       msg = result.fail_msg
11921       if msg or not result.payload:
11922         if not msg:
11923           msg = "disk not found"
11924         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11925                                  (name, msg))
11926
11927       if result.payload.is_degraded:
11928         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11929
11930   def _RemoveOldStorage(self, node_name, iv_names):
11931     for name, (_, old_lvs, _) in iv_names.iteritems():
11932       self.lu.LogInfo("Remove logical volumes for %s", name)
11933
11934       for lv in old_lvs:
11935         self.cfg.SetDiskID(lv, node_name)
11936
11937         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11938         if msg:
11939           self.lu.LogWarning("Can't remove old LV: %s", msg,
11940                              hint="remove unused LVs manually")
11941
11942   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11943     """Replace a disk on the primary or secondary for DRBD 8.
11944
11945     The algorithm for replace is quite complicated:
11946
11947       1. for each disk to be replaced:
11948
11949         1. create new LVs on the target node with unique names
11950         1. detach old LVs from the drbd device
11951         1. rename old LVs to name_replaced.<time_t>
11952         1. rename new LVs to old LVs
11953         1. attach the new LVs (with the old names now) to the drbd device
11954
11955       1. wait for sync across all devices
11956
11957       1. for each modified disk:
11958
11959         1. remove old LVs (which have the name name_replaces.<time_t>)
11960
11961     Failures are not very well handled.
11962
11963     """
11964     steps_total = 6
11965
11966     # Step: check device activation
11967     self.lu.LogStep(1, steps_total, "Check device existence")
11968     self._CheckDisksExistence([self.other_node, self.target_node])
11969     self._CheckVolumeGroup([self.target_node, self.other_node])
11970
11971     # Step: check other node consistency
11972     self.lu.LogStep(2, steps_total, "Check peer consistency")
11973     self._CheckDisksConsistency(self.other_node,
11974                                 self.other_node == self.instance.primary_node,
11975                                 False)
11976
11977     # Step: create new storage
11978     self.lu.LogStep(3, steps_total, "Allocate new storage")
11979     iv_names = self._CreateNewStorage(self.target_node)
11980
11981     # Step: for each lv, detach+rename*2+attach
11982     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11983     for dev, old_lvs, new_lvs in iv_names.itervalues():
11984       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11985
11986       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11987                                                      old_lvs)
11988       result.Raise("Can't detach drbd from local storage on node"
11989                    " %s for device %s" % (self.target_node, dev.iv_name))
11990       #dev.children = []
11991       #cfg.Update(instance)
11992
11993       # ok, we created the new LVs, so now we know we have the needed
11994       # storage; as such, we proceed on the target node to rename
11995       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11996       # using the assumption that logical_id == physical_id (which in
11997       # turn is the unique_id on that node)
11998
11999       # FIXME(iustin): use a better name for the replaced LVs
12000       temp_suffix = int(time.time())
12001       ren_fn = lambda d, suff: (d.physical_id[0],
12002                                 d.physical_id[1] + "_replaced-%s" % suff)
12003
12004       # Build the rename list based on what LVs exist on the node
12005       rename_old_to_new = []
12006       for to_ren in old_lvs:
12007         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12008         if not result.fail_msg and result.payload:
12009           # device exists
12010           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12011
12012       self.lu.LogInfo("Renaming the old LVs on the target node")
12013       result = self.rpc.call_blockdev_rename(self.target_node,
12014                                              rename_old_to_new)
12015       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12016
12017       # Now we rename the new LVs to the old LVs
12018       self.lu.LogInfo("Renaming the new LVs on the target node")
12019       rename_new_to_old = [(new, old.physical_id)
12020                            for old, new in zip(old_lvs, new_lvs)]
12021       result = self.rpc.call_blockdev_rename(self.target_node,
12022                                              rename_new_to_old)
12023       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12024
12025       # Intermediate steps of in memory modifications
12026       for old, new in zip(old_lvs, new_lvs):
12027         new.logical_id = old.logical_id
12028         self.cfg.SetDiskID(new, self.target_node)
12029
12030       # We need to modify old_lvs so that removal later removes the
12031       # right LVs, not the newly added ones; note that old_lvs is a
12032       # copy here
12033       for disk in old_lvs:
12034         disk.logical_id = ren_fn(disk, temp_suffix)
12035         self.cfg.SetDiskID(disk, self.target_node)
12036
12037       # Now that the new lvs have the old name, we can add them to the device
12038       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12039       result = self.rpc.call_blockdev_addchildren(self.target_node,
12040                                                   (dev, self.instance), new_lvs)
12041       msg = result.fail_msg
12042       if msg:
12043         for new_lv in new_lvs:
12044           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12045                                                new_lv).fail_msg
12046           if msg2:
12047             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12048                                hint=("cleanup manually the unused logical"
12049                                      "volumes"))
12050         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12051
12052     cstep = itertools.count(5)
12053
12054     if self.early_release:
12055       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12056       self._RemoveOldStorage(self.target_node, iv_names)
12057       # TODO: Check if releasing locks early still makes sense
12058       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12059     else:
12060       # Release all resource locks except those used by the instance
12061       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12062                     keep=self.node_secondary_ip.keys())
12063
12064     # Release all node locks while waiting for sync
12065     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12066
12067     # TODO: Can the instance lock be downgraded here? Take the optional disk
12068     # shutdown in the caller into consideration.
12069
12070     # Wait for sync
12071     # This can fail as the old devices are degraded and _WaitForSync
12072     # does a combined result over all disks, so we don't check its return value
12073     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12074     _WaitForSync(self.lu, self.instance)
12075
12076     # Check all devices manually
12077     self._CheckDevices(self.instance.primary_node, iv_names)
12078
12079     # Step: remove old storage
12080     if not self.early_release:
12081       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12082       self._RemoveOldStorage(self.target_node, iv_names)
12083
12084   def _ExecDrbd8Secondary(self, feedback_fn):
12085     """Replace the secondary node for DRBD 8.
12086
12087     The algorithm for replace is quite complicated:
12088       - for all disks of the instance:
12089         - create new LVs on the new node with same names
12090         - shutdown the drbd device on the old secondary
12091         - disconnect the drbd network on the primary
12092         - create the drbd device on the new secondary
12093         - network attach the drbd on the primary, using an artifice:
12094           the drbd code for Attach() will connect to the network if it
12095           finds a device which is connected to the good local disks but
12096           not network enabled
12097       - wait for sync across all devices
12098       - remove all disks from the old secondary
12099
12100     Failures are not very well handled.
12101
12102     """
12103     steps_total = 6
12104
12105     pnode = self.instance.primary_node
12106
12107     # Step: check device activation
12108     self.lu.LogStep(1, steps_total, "Check device existence")
12109     self._CheckDisksExistence([self.instance.primary_node])
12110     self._CheckVolumeGroup([self.instance.primary_node])
12111
12112     # Step: check other node consistency
12113     self.lu.LogStep(2, steps_total, "Check peer consistency")
12114     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12115
12116     # Step: create new storage
12117     self.lu.LogStep(3, steps_total, "Allocate new storage")
12118     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12119     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12120     for idx, dev in enumerate(disks):
12121       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12122                       (self.new_node, idx))
12123       # we pass force_create=True to force LVM creation
12124       for new_lv in dev.children:
12125         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12126                              True, _GetInstanceInfoText(self.instance), False,
12127                              excl_stor)
12128
12129     # Step 4: dbrd minors and drbd setups changes
12130     # after this, we must manually remove the drbd minors on both the
12131     # error and the success paths
12132     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12133     minors = self.cfg.AllocateDRBDMinor([self.new_node
12134                                          for dev in self.instance.disks],
12135                                         self.instance.name)
12136     logging.debug("Allocated minors %r", minors)
12137
12138     iv_names = {}
12139     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12140       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12141                       (self.new_node, idx))
12142       # create new devices on new_node; note that we create two IDs:
12143       # one without port, so the drbd will be activated without
12144       # networking information on the new node at this stage, and one
12145       # with network, for the latter activation in step 4
12146       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12147       if self.instance.primary_node == o_node1:
12148         p_minor = o_minor1
12149       else:
12150         assert self.instance.primary_node == o_node2, "Three-node instance?"
12151         p_minor = o_minor2
12152
12153       new_alone_id = (self.instance.primary_node, self.new_node, None,
12154                       p_minor, new_minor, o_secret)
12155       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12156                     p_minor, new_minor, o_secret)
12157
12158       iv_names[idx] = (dev, dev.children, new_net_id)
12159       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12160                     new_net_id)
12161       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12162                               logical_id=new_alone_id,
12163                               children=dev.children,
12164                               size=dev.size,
12165                               params={})
12166       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12167                                              self.cfg)
12168       try:
12169         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12170                               anno_new_drbd,
12171                               _GetInstanceInfoText(self.instance), False,
12172                               excl_stor)
12173       except errors.GenericError:
12174         self.cfg.ReleaseDRBDMinors(self.instance.name)
12175         raise
12176
12177     # We have new devices, shutdown the drbd on the old secondary
12178     for idx, dev in enumerate(self.instance.disks):
12179       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12180       self.cfg.SetDiskID(dev, self.target_node)
12181       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12182                                             (dev, self.instance)).fail_msg
12183       if msg:
12184         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12185                            "node: %s" % (idx, msg),
12186                            hint=("Please cleanup this device manually as"
12187                                  " soon as possible"))
12188
12189     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12190     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12191                                                self.instance.disks)[pnode]
12192
12193     msg = result.fail_msg
12194     if msg:
12195       # detaches didn't succeed (unlikely)
12196       self.cfg.ReleaseDRBDMinors(self.instance.name)
12197       raise errors.OpExecError("Can't detach the disks from the network on"
12198                                " old node: %s" % (msg,))
12199
12200     # if we managed to detach at least one, we update all the disks of
12201     # the instance to point to the new secondary
12202     self.lu.LogInfo("Updating instance configuration")
12203     for dev, _, new_logical_id in iv_names.itervalues():
12204       dev.logical_id = new_logical_id
12205       self.cfg.SetDiskID(dev, self.instance.primary_node)
12206
12207     self.cfg.Update(self.instance, feedback_fn)
12208
12209     # Release all node locks (the configuration has been updated)
12210     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12211
12212     # and now perform the drbd attach
12213     self.lu.LogInfo("Attaching primary drbds to new secondary"
12214                     " (standalone => connected)")
12215     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12216                                             self.new_node],
12217                                            self.node_secondary_ip,
12218                                            (self.instance.disks, self.instance),
12219                                            self.instance.name,
12220                                            False)
12221     for to_node, to_result in result.items():
12222       msg = to_result.fail_msg
12223       if msg:
12224         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12225                            to_node, msg,
12226                            hint=("please do a gnt-instance info to see the"
12227                                  " status of disks"))
12228
12229     cstep = itertools.count(5)
12230
12231     if self.early_release:
12232       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12233       self._RemoveOldStorage(self.target_node, iv_names)
12234       # TODO: Check if releasing locks early still makes sense
12235       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12236     else:
12237       # Release all resource locks except those used by the instance
12238       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12239                     keep=self.node_secondary_ip.keys())
12240
12241     # TODO: Can the instance lock be downgraded here? Take the optional disk
12242     # shutdown in the caller into consideration.
12243
12244     # Wait for sync
12245     # This can fail as the old devices are degraded and _WaitForSync
12246     # does a combined result over all disks, so we don't check its return value
12247     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12248     _WaitForSync(self.lu, self.instance)
12249
12250     # Check all devices manually
12251     self._CheckDevices(self.instance.primary_node, iv_names)
12252
12253     # Step: remove old storage
12254     if not self.early_release:
12255       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12256       self._RemoveOldStorage(self.target_node, iv_names)
12257
12258
12259 class LURepairNodeStorage(NoHooksLU):
12260   """Repairs the volume group on a node.
12261
12262   """
12263   REQ_BGL = False
12264
12265   def CheckArguments(self):
12266     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12267
12268     storage_type = self.op.storage_type
12269
12270     if (constants.SO_FIX_CONSISTENCY not in
12271         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12272       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12273                                  " repaired" % storage_type,
12274                                  errors.ECODE_INVAL)
12275
12276   def ExpandNames(self):
12277     self.needed_locks = {
12278       locking.LEVEL_NODE: [self.op.node_name],
12279       }
12280
12281   def _CheckFaultyDisks(self, instance, node_name):
12282     """Ensure faulty disks abort the opcode or at least warn."""
12283     try:
12284       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12285                                   node_name, True):
12286         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12287                                    " node '%s'" % (instance.name, node_name),
12288                                    errors.ECODE_STATE)
12289     except errors.OpPrereqError, err:
12290       if self.op.ignore_consistency:
12291         self.LogWarning(str(err.args[0]))
12292       else:
12293         raise
12294
12295   def CheckPrereq(self):
12296     """Check prerequisites.
12297
12298     """
12299     # Check whether any instance on this node has faulty disks
12300     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12301       if inst.admin_state != constants.ADMINST_UP:
12302         continue
12303       check_nodes = set(inst.all_nodes)
12304       check_nodes.discard(self.op.node_name)
12305       for inst_node_name in check_nodes:
12306         self._CheckFaultyDisks(inst, inst_node_name)
12307
12308   def Exec(self, feedback_fn):
12309     feedback_fn("Repairing storage unit '%s' on %s ..." %
12310                 (self.op.name, self.op.node_name))
12311
12312     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12313     result = self.rpc.call_storage_execute(self.op.node_name,
12314                                            self.op.storage_type, st_args,
12315                                            self.op.name,
12316                                            constants.SO_FIX_CONSISTENCY)
12317     result.Raise("Failed to repair storage unit '%s' on %s" %
12318                  (self.op.name, self.op.node_name))
12319
12320
12321 class LUNodeEvacuate(NoHooksLU):
12322   """Evacuates instances off a list of nodes.
12323
12324   """
12325   REQ_BGL = False
12326
12327   _MODE2IALLOCATOR = {
12328     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12329     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12330     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12331     }
12332   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12333   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12334           constants.IALLOCATOR_NEVAC_MODES)
12335
12336   def CheckArguments(self):
12337     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12338
12339   def ExpandNames(self):
12340     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12341
12342     if self.op.remote_node is not None:
12343       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12344       assert self.op.remote_node
12345
12346       if self.op.remote_node == self.op.node_name:
12347         raise errors.OpPrereqError("Can not use evacuated node as a new"
12348                                    " secondary node", errors.ECODE_INVAL)
12349
12350       if self.op.mode != constants.NODE_EVAC_SEC:
12351         raise errors.OpPrereqError("Without the use of an iallocator only"
12352                                    " secondary instances can be evacuated",
12353                                    errors.ECODE_INVAL)
12354
12355     # Declare locks
12356     self.share_locks = _ShareAll()
12357     self.needed_locks = {
12358       locking.LEVEL_INSTANCE: [],
12359       locking.LEVEL_NODEGROUP: [],
12360       locking.LEVEL_NODE: [],
12361       }
12362
12363     # Determine nodes (via group) optimistically, needs verification once locks
12364     # have been acquired
12365     self.lock_nodes = self._DetermineNodes()
12366
12367   def _DetermineNodes(self):
12368     """Gets the list of nodes to operate on.
12369
12370     """
12371     if self.op.remote_node is None:
12372       # Iallocator will choose any node(s) in the same group
12373       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12374     else:
12375       group_nodes = frozenset([self.op.remote_node])
12376
12377     # Determine nodes to be locked
12378     return set([self.op.node_name]) | group_nodes
12379
12380   def _DetermineInstances(self):
12381     """Builds list of instances to operate on.
12382
12383     """
12384     assert self.op.mode in constants.NODE_EVAC_MODES
12385
12386     if self.op.mode == constants.NODE_EVAC_PRI:
12387       # Primary instances only
12388       inst_fn = _GetNodePrimaryInstances
12389       assert self.op.remote_node is None, \
12390         "Evacuating primary instances requires iallocator"
12391     elif self.op.mode == constants.NODE_EVAC_SEC:
12392       # Secondary instances only
12393       inst_fn = _GetNodeSecondaryInstances
12394     else:
12395       # All instances
12396       assert self.op.mode == constants.NODE_EVAC_ALL
12397       inst_fn = _GetNodeInstances
12398       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12399       # per instance
12400       raise errors.OpPrereqError("Due to an issue with the iallocator"
12401                                  " interface it is not possible to evacuate"
12402                                  " all instances at once; specify explicitly"
12403                                  " whether to evacuate primary or secondary"
12404                                  " instances",
12405                                  errors.ECODE_INVAL)
12406
12407     return inst_fn(self.cfg, self.op.node_name)
12408
12409   def DeclareLocks(self, level):
12410     if level == locking.LEVEL_INSTANCE:
12411       # Lock instances optimistically, needs verification once node and group
12412       # locks have been acquired
12413       self.needed_locks[locking.LEVEL_INSTANCE] = \
12414         set(i.name for i in self._DetermineInstances())
12415
12416     elif level == locking.LEVEL_NODEGROUP:
12417       # Lock node groups for all potential target nodes optimistically, needs
12418       # verification once nodes have been acquired
12419       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12420         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12421
12422     elif level == locking.LEVEL_NODE:
12423       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12424
12425   def CheckPrereq(self):
12426     # Verify locks
12427     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12428     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12429     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12430
12431     need_nodes = self._DetermineNodes()
12432
12433     if not owned_nodes.issuperset(need_nodes):
12434       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12435                                  " locks were acquired, current nodes are"
12436                                  " are '%s', used to be '%s'; retry the"
12437                                  " operation" %
12438                                  (self.op.node_name,
12439                                   utils.CommaJoin(need_nodes),
12440                                   utils.CommaJoin(owned_nodes)),
12441                                  errors.ECODE_STATE)
12442
12443     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12444     if owned_groups != wanted_groups:
12445       raise errors.OpExecError("Node groups changed since locks were acquired,"
12446                                " current groups are '%s', used to be '%s';"
12447                                " retry the operation" %
12448                                (utils.CommaJoin(wanted_groups),
12449                                 utils.CommaJoin(owned_groups)))
12450
12451     # Determine affected instances
12452     self.instances = self._DetermineInstances()
12453     self.instance_names = [i.name for i in self.instances]
12454
12455     if set(self.instance_names) != owned_instances:
12456       raise errors.OpExecError("Instances on node '%s' changed since locks"
12457                                " were acquired, current instances are '%s',"
12458                                " used to be '%s'; retry the operation" %
12459                                (self.op.node_name,
12460                                 utils.CommaJoin(self.instance_names),
12461                                 utils.CommaJoin(owned_instances)))
12462
12463     if self.instance_names:
12464       self.LogInfo("Evacuating instances from node '%s': %s",
12465                    self.op.node_name,
12466                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12467     else:
12468       self.LogInfo("No instances to evacuate from node '%s'",
12469                    self.op.node_name)
12470
12471     if self.op.remote_node is not None:
12472       for i in self.instances:
12473         if i.primary_node == self.op.remote_node:
12474           raise errors.OpPrereqError("Node %s is the primary node of"
12475                                      " instance %s, cannot use it as"
12476                                      " secondary" %
12477                                      (self.op.remote_node, i.name),
12478                                      errors.ECODE_INVAL)
12479
12480   def Exec(self, feedback_fn):
12481     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12482
12483     if not self.instance_names:
12484       # No instances to evacuate
12485       jobs = []
12486
12487     elif self.op.iallocator is not None:
12488       # TODO: Implement relocation to other group
12489       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12490       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12491                                      instances=list(self.instance_names))
12492       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12493
12494       ial.Run(self.op.iallocator)
12495
12496       if not ial.success:
12497         raise errors.OpPrereqError("Can't compute node evacuation using"
12498                                    " iallocator '%s': %s" %
12499                                    (self.op.iallocator, ial.info),
12500                                    errors.ECODE_NORES)
12501
12502       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12503
12504     elif self.op.remote_node is not None:
12505       assert self.op.mode == constants.NODE_EVAC_SEC
12506       jobs = [
12507         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12508                                         remote_node=self.op.remote_node,
12509                                         disks=[],
12510                                         mode=constants.REPLACE_DISK_CHG,
12511                                         early_release=self.op.early_release)]
12512         for instance_name in self.instance_names]
12513
12514     else:
12515       raise errors.ProgrammerError("No iallocator or remote node")
12516
12517     return ResultWithJobs(jobs)
12518
12519
12520 def _SetOpEarlyRelease(early_release, op):
12521   """Sets C{early_release} flag on opcodes if available.
12522
12523   """
12524   try:
12525     op.early_release = early_release
12526   except AttributeError:
12527     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12528
12529   return op
12530
12531
12532 def _NodeEvacDest(use_nodes, group, nodes):
12533   """Returns group or nodes depending on caller's choice.
12534
12535   """
12536   if use_nodes:
12537     return utils.CommaJoin(nodes)
12538   else:
12539     return group
12540
12541
12542 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12543   """Unpacks the result of change-group and node-evacuate iallocator requests.
12544
12545   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12546   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12547
12548   @type lu: L{LogicalUnit}
12549   @param lu: Logical unit instance
12550   @type alloc_result: tuple/list
12551   @param alloc_result: Result from iallocator
12552   @type early_release: bool
12553   @param early_release: Whether to release locks early if possible
12554   @type use_nodes: bool
12555   @param use_nodes: Whether to display node names instead of groups
12556
12557   """
12558   (moved, failed, jobs) = alloc_result
12559
12560   if failed:
12561     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12562                                  for (name, reason) in failed)
12563     lu.LogWarning("Unable to evacuate instances %s", failreason)
12564     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12565
12566   if moved:
12567     lu.LogInfo("Instances to be moved: %s",
12568                utils.CommaJoin("%s (to %s)" %
12569                                (name, _NodeEvacDest(use_nodes, group, nodes))
12570                                for (name, group, nodes) in moved))
12571
12572   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12573               map(opcodes.OpCode.LoadOpCode, ops))
12574           for ops in jobs]
12575
12576
12577 def _DiskSizeInBytesToMebibytes(lu, size):
12578   """Converts a disk size in bytes to mebibytes.
12579
12580   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12581
12582   """
12583   (mib, remainder) = divmod(size, 1024 * 1024)
12584
12585   if remainder != 0:
12586     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12587                   " to not overwrite existing data (%s bytes will not be"
12588                   " wiped)", (1024 * 1024) - remainder)
12589     mib += 1
12590
12591   return mib
12592
12593
12594 class LUInstanceGrowDisk(LogicalUnit):
12595   """Grow a disk of an instance.
12596
12597   """
12598   HPATH = "disk-grow"
12599   HTYPE = constants.HTYPE_INSTANCE
12600   REQ_BGL = False
12601
12602   def ExpandNames(self):
12603     self._ExpandAndLockInstance()
12604     self.needed_locks[locking.LEVEL_NODE] = []
12605     self.needed_locks[locking.LEVEL_NODE_RES] = []
12606     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12607     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12608
12609   def DeclareLocks(self, level):
12610     if level == locking.LEVEL_NODE:
12611       self._LockInstancesNodes()
12612     elif level == locking.LEVEL_NODE_RES:
12613       # Copy node locks
12614       self.needed_locks[locking.LEVEL_NODE_RES] = \
12615         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12616
12617   def BuildHooksEnv(self):
12618     """Build hooks env.
12619
12620     This runs on the master, the primary and all the secondaries.
12621
12622     """
12623     env = {
12624       "DISK": self.op.disk,
12625       "AMOUNT": self.op.amount,
12626       "ABSOLUTE": self.op.absolute,
12627       }
12628     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12629     return env
12630
12631   def BuildHooksNodes(self):
12632     """Build hooks nodes.
12633
12634     """
12635     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12636     return (nl, nl)
12637
12638   def CheckPrereq(self):
12639     """Check prerequisites.
12640
12641     This checks that the instance is in the cluster.
12642
12643     """
12644     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12645     assert instance is not None, \
12646       "Cannot retrieve locked instance %s" % self.op.instance_name
12647     nodenames = list(instance.all_nodes)
12648     for node in nodenames:
12649       _CheckNodeOnline(self, node)
12650
12651     self.instance = instance
12652
12653     if instance.disk_template not in constants.DTS_GROWABLE:
12654       raise errors.OpPrereqError("Instance's disk layout does not support"
12655                                  " growing", errors.ECODE_INVAL)
12656
12657     self.disk = instance.FindDisk(self.op.disk)
12658
12659     if self.op.absolute:
12660       self.target = self.op.amount
12661       self.delta = self.target - self.disk.size
12662       if self.delta < 0:
12663         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12664                                    "current disk size (%s)" %
12665                                    (utils.FormatUnit(self.target, "h"),
12666                                     utils.FormatUnit(self.disk.size, "h")),
12667                                    errors.ECODE_STATE)
12668     else:
12669       self.delta = self.op.amount
12670       self.target = self.disk.size + self.delta
12671       if self.delta < 0:
12672         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12673                                    utils.FormatUnit(self.delta, "h"),
12674                                    errors.ECODE_INVAL)
12675
12676     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12677
12678   def _CheckDiskSpace(self, nodenames, req_vgspace):
12679     template = self.instance.disk_template
12680     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12681       # TODO: check the free disk space for file, when that feature will be
12682       # supported
12683       nodes = map(self.cfg.GetNodeInfo, nodenames)
12684       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12685                         nodes)
12686       if es_nodes:
12687         # With exclusive storage we need to something smarter than just looking
12688         # at free space; for now, let's simply abort the operation.
12689         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12690                                    " is enabled", errors.ECODE_STATE)
12691       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12692
12693   def Exec(self, feedback_fn):
12694     """Execute disk grow.
12695
12696     """
12697     instance = self.instance
12698     disk = self.disk
12699
12700     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12701     assert (self.owned_locks(locking.LEVEL_NODE) ==
12702             self.owned_locks(locking.LEVEL_NODE_RES))
12703
12704     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12705
12706     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12707     if not disks_ok:
12708       raise errors.OpExecError("Cannot activate block device to grow")
12709
12710     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12711                 (self.op.disk, instance.name,
12712                  utils.FormatUnit(self.delta, "h"),
12713                  utils.FormatUnit(self.target, "h")))
12714
12715     # First run all grow ops in dry-run mode
12716     for node in instance.all_nodes:
12717       self.cfg.SetDiskID(disk, node)
12718       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12719                                            True, True)
12720       result.Raise("Dry-run grow request failed to node %s" % node)
12721
12722     if wipe_disks:
12723       # Get disk size from primary node for wiping
12724       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12725       result.Raise("Failed to retrieve disk size from node '%s'" %
12726                    instance.primary_node)
12727
12728       (disk_size_in_bytes, ) = result.payload
12729
12730       if disk_size_in_bytes is None:
12731         raise errors.OpExecError("Failed to retrieve disk size from primary"
12732                                  " node '%s'" % instance.primary_node)
12733
12734       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12735
12736       assert old_disk_size >= disk.size, \
12737         ("Retrieved disk size too small (got %s, should be at least %s)" %
12738          (old_disk_size, disk.size))
12739     else:
12740       old_disk_size = None
12741
12742     # We know that (as far as we can test) operations across different
12743     # nodes will succeed, time to run it for real on the backing storage
12744     for node in instance.all_nodes:
12745       self.cfg.SetDiskID(disk, node)
12746       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12747                                            False, True)
12748       result.Raise("Grow request failed to node %s" % node)
12749
12750     # And now execute it for logical storage, on the primary node
12751     node = instance.primary_node
12752     self.cfg.SetDiskID(disk, node)
12753     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12754                                          False, False)
12755     result.Raise("Grow request failed to node %s" % node)
12756
12757     disk.RecordGrow(self.delta)
12758     self.cfg.Update(instance, feedback_fn)
12759
12760     # Changes have been recorded, release node lock
12761     _ReleaseLocks(self, locking.LEVEL_NODE)
12762
12763     # Downgrade lock while waiting for sync
12764     self.glm.downgrade(locking.LEVEL_INSTANCE)
12765
12766     assert wipe_disks ^ (old_disk_size is None)
12767
12768     if wipe_disks:
12769       assert instance.disks[self.op.disk] == disk
12770
12771       # Wipe newly added disk space
12772       _WipeDisks(self, instance,
12773                  disks=[(self.op.disk, disk, old_disk_size)])
12774
12775     if self.op.wait_for_sync:
12776       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12777       if disk_abort:
12778         self.LogWarning("Disk syncing has not returned a good status; check"
12779                         " the instance")
12780       if instance.admin_state != constants.ADMINST_UP:
12781         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12782     elif instance.admin_state != constants.ADMINST_UP:
12783       self.LogWarning("Not shutting down the disk even if the instance is"
12784                       " not supposed to be running because no wait for"
12785                       " sync mode was requested")
12786
12787     assert self.owned_locks(locking.LEVEL_NODE_RES)
12788     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12789
12790
12791 class LUInstanceQueryData(NoHooksLU):
12792   """Query runtime instance data.
12793
12794   """
12795   REQ_BGL = False
12796
12797   def ExpandNames(self):
12798     self.needed_locks = {}
12799
12800     # Use locking if requested or when non-static information is wanted
12801     if not (self.op.static or self.op.use_locking):
12802       self.LogWarning("Non-static data requested, locks need to be acquired")
12803       self.op.use_locking = True
12804
12805     if self.op.instances or not self.op.use_locking:
12806       # Expand instance names right here
12807       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12808     else:
12809       # Will use acquired locks
12810       self.wanted_names = None
12811
12812     if self.op.use_locking:
12813       self.share_locks = _ShareAll()
12814
12815       if self.wanted_names is None:
12816         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12817       else:
12818         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12819
12820       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12821       self.needed_locks[locking.LEVEL_NODE] = []
12822       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12823
12824   def DeclareLocks(self, level):
12825     if self.op.use_locking:
12826       if level == locking.LEVEL_NODEGROUP:
12827         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12828
12829         # Lock all groups used by instances optimistically; this requires going
12830         # via the node before it's locked, requiring verification later on
12831         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12832           frozenset(group_uuid
12833                     for instance_name in owned_instances
12834                     for group_uuid in
12835                       self.cfg.GetInstanceNodeGroups(instance_name))
12836
12837       elif level == locking.LEVEL_NODE:
12838         self._LockInstancesNodes()
12839
12840   def CheckPrereq(self):
12841     """Check prerequisites.
12842
12843     This only checks the optional instance list against the existing names.
12844
12845     """
12846     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12847     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12848     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12849
12850     if self.wanted_names is None:
12851       assert self.op.use_locking, "Locking was not used"
12852       self.wanted_names = owned_instances
12853
12854     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12855
12856     if self.op.use_locking:
12857       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12858                                 None)
12859     else:
12860       assert not (owned_instances or owned_groups or owned_nodes)
12861
12862     self.wanted_instances = instances.values()
12863
12864   def _ComputeBlockdevStatus(self, node, instance, dev):
12865     """Returns the status of a block device
12866
12867     """
12868     if self.op.static or not node:
12869       return None
12870
12871     self.cfg.SetDiskID(dev, node)
12872
12873     result = self.rpc.call_blockdev_find(node, dev)
12874     if result.offline:
12875       return None
12876
12877     result.Raise("Can't compute disk status for %s" % instance.name)
12878
12879     status = result.payload
12880     if status is None:
12881       return None
12882
12883     return (status.dev_path, status.major, status.minor,
12884             status.sync_percent, status.estimated_time,
12885             status.is_degraded, status.ldisk_status)
12886
12887   def _ComputeDiskStatus(self, instance, snode, dev):
12888     """Compute block device status.
12889
12890     """
12891     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12892
12893     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12894
12895   def _ComputeDiskStatusInner(self, instance, snode, dev):
12896     """Compute block device status.
12897
12898     @attention: The device has to be annotated already.
12899
12900     """
12901     if dev.dev_type in constants.LDS_DRBD:
12902       # we change the snode then (otherwise we use the one passed in)
12903       if dev.logical_id[0] == instance.primary_node:
12904         snode = dev.logical_id[1]
12905       else:
12906         snode = dev.logical_id[0]
12907
12908     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12909                                               instance, dev)
12910     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12911
12912     if dev.children:
12913       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12914                                         instance, snode),
12915                          dev.children)
12916     else:
12917       dev_children = []
12918
12919     return {
12920       "iv_name": dev.iv_name,
12921       "dev_type": dev.dev_type,
12922       "logical_id": dev.logical_id,
12923       "physical_id": dev.physical_id,
12924       "pstatus": dev_pstatus,
12925       "sstatus": dev_sstatus,
12926       "children": dev_children,
12927       "mode": dev.mode,
12928       "size": dev.size,
12929       }
12930
12931   def Exec(self, feedback_fn):
12932     """Gather and return data"""
12933     result = {}
12934
12935     cluster = self.cfg.GetClusterInfo()
12936
12937     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12938     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12939
12940     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12941                                                  for node in nodes.values()))
12942
12943     group2name_fn = lambda uuid: groups[uuid].name
12944
12945     for instance in self.wanted_instances:
12946       pnode = nodes[instance.primary_node]
12947
12948       if self.op.static or pnode.offline:
12949         remote_state = None
12950         if pnode.offline:
12951           self.LogWarning("Primary node %s is marked offline, returning static"
12952                           " information only for instance %s" %
12953                           (pnode.name, instance.name))
12954       else:
12955         remote_info = self.rpc.call_instance_info(instance.primary_node,
12956                                                   instance.name,
12957                                                   instance.hypervisor)
12958         remote_info.Raise("Error checking node %s" % instance.primary_node)
12959         remote_info = remote_info.payload
12960         if remote_info and "state" in remote_info:
12961           remote_state = "up"
12962         else:
12963           if instance.admin_state == constants.ADMINST_UP:
12964             remote_state = "down"
12965           else:
12966             remote_state = instance.admin_state
12967
12968       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12969                   instance.disks)
12970
12971       snodes_group_uuids = [nodes[snode_name].group
12972                             for snode_name in instance.secondary_nodes]
12973
12974       result[instance.name] = {
12975         "name": instance.name,
12976         "config_state": instance.admin_state,
12977         "run_state": remote_state,
12978         "pnode": instance.primary_node,
12979         "pnode_group_uuid": pnode.group,
12980         "pnode_group_name": group2name_fn(pnode.group),
12981         "snodes": instance.secondary_nodes,
12982         "snodes_group_uuids": snodes_group_uuids,
12983         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12984         "os": instance.os,
12985         # this happens to be the same format used for hooks
12986         "nics": _NICListToTuple(self, instance.nics),
12987         "disk_template": instance.disk_template,
12988         "disks": disks,
12989         "hypervisor": instance.hypervisor,
12990         "network_port": instance.network_port,
12991         "hv_instance": instance.hvparams,
12992         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12993         "be_instance": instance.beparams,
12994         "be_actual": cluster.FillBE(instance),
12995         "os_instance": instance.osparams,
12996         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12997         "serial_no": instance.serial_no,
12998         "mtime": instance.mtime,
12999         "ctime": instance.ctime,
13000         "uuid": instance.uuid,
13001         }
13002
13003     return result
13004
13005
13006 def PrepareContainerMods(mods, private_fn):
13007   """Prepares a list of container modifications by adding a private data field.
13008
13009   @type mods: list of tuples; (operation, index, parameters)
13010   @param mods: List of modifications
13011   @type private_fn: callable or None
13012   @param private_fn: Callable for constructing a private data field for a
13013     modification
13014   @rtype: list
13015
13016   """
13017   if private_fn is None:
13018     fn = lambda: None
13019   else:
13020     fn = private_fn
13021
13022   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13023
13024
13025 #: Type description for changes as returned by L{ApplyContainerMods}'s
13026 #: callbacks
13027 _TApplyContModsCbChanges = \
13028   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13029     ht.TNonEmptyString,
13030     ht.TAny,
13031     ])))
13032
13033
13034 def ApplyContainerMods(kind, container, chgdesc, mods,
13035                        create_fn, modify_fn, remove_fn):
13036   """Applies descriptions in C{mods} to C{container}.
13037
13038   @type kind: string
13039   @param kind: One-word item description
13040   @type container: list
13041   @param container: Container to modify
13042   @type chgdesc: None or list
13043   @param chgdesc: List of applied changes
13044   @type mods: list
13045   @param mods: Modifications as returned by L{PrepareContainerMods}
13046   @type create_fn: callable
13047   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13048     receives absolute item index, parameters and private data object as added
13049     by L{PrepareContainerMods}, returns tuple containing new item and changes
13050     as list
13051   @type modify_fn: callable
13052   @param modify_fn: Callback for modifying an existing item
13053     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13054     and private data object as added by L{PrepareContainerMods}, returns
13055     changes as list
13056   @type remove_fn: callable
13057   @param remove_fn: Callback on removing item; receives absolute item index,
13058     item and private data object as added by L{PrepareContainerMods}
13059
13060   """
13061   for (op, idx, params, private) in mods:
13062     if idx == -1:
13063       # Append
13064       absidx = len(container) - 1
13065     elif idx < 0:
13066       raise IndexError("Not accepting negative indices other than -1")
13067     elif idx > len(container):
13068       raise IndexError("Got %s index %s, but there are only %s" %
13069                        (kind, idx, len(container)))
13070     else:
13071       absidx = idx
13072
13073     changes = None
13074
13075     if op == constants.DDM_ADD:
13076       # Calculate where item will be added
13077       if idx == -1:
13078         addidx = len(container)
13079       else:
13080         addidx = idx
13081
13082       if create_fn is None:
13083         item = params
13084       else:
13085         (item, changes) = create_fn(addidx, params, private)
13086
13087       if idx == -1:
13088         container.append(item)
13089       else:
13090         assert idx >= 0
13091         assert idx <= len(container)
13092         # list.insert does so before the specified index
13093         container.insert(idx, item)
13094     else:
13095       # Retrieve existing item
13096       try:
13097         item = container[absidx]
13098       except IndexError:
13099         raise IndexError("Invalid %s index %s" % (kind, idx))
13100
13101       if op == constants.DDM_REMOVE:
13102         assert not params
13103
13104         if remove_fn is not None:
13105           remove_fn(absidx, item, private)
13106
13107         changes = [("%s/%s" % (kind, absidx), "remove")]
13108
13109         assert container[absidx] == item
13110         del container[absidx]
13111       elif op == constants.DDM_MODIFY:
13112         if modify_fn is not None:
13113           changes = modify_fn(absidx, item, params, private)
13114       else:
13115         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13116
13117     assert _TApplyContModsCbChanges(changes)
13118
13119     if not (chgdesc is None or changes is None):
13120       chgdesc.extend(changes)
13121
13122
13123 def _UpdateIvNames(base_index, disks):
13124   """Updates the C{iv_name} attribute of disks.
13125
13126   @type disks: list of L{objects.Disk}
13127
13128   """
13129   for (idx, disk) in enumerate(disks):
13130     disk.iv_name = "disk/%s" % (base_index + idx, )
13131
13132
13133 class _InstNicModPrivate:
13134   """Data structure for network interface modifications.
13135
13136   Used by L{LUInstanceSetParams}.
13137
13138   """
13139   def __init__(self):
13140     self.params = None
13141     self.filled = None
13142
13143
13144 class LUInstanceSetParams(LogicalUnit):
13145   """Modifies an instances's parameters.
13146
13147   """
13148   HPATH = "instance-modify"
13149   HTYPE = constants.HTYPE_INSTANCE
13150   REQ_BGL = False
13151
13152   @staticmethod
13153   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13154     assert ht.TList(mods)
13155     assert not mods or len(mods[0]) in (2, 3)
13156
13157     if mods and len(mods[0]) == 2:
13158       result = []
13159
13160       addremove = 0
13161       for op, params in mods:
13162         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13163           result.append((op, -1, params))
13164           addremove += 1
13165
13166           if addremove > 1:
13167             raise errors.OpPrereqError("Only one %s add or remove operation is"
13168                                        " supported at a time" % kind,
13169                                        errors.ECODE_INVAL)
13170         else:
13171           result.append((constants.DDM_MODIFY, op, params))
13172
13173       assert verify_fn(result)
13174     else:
13175       result = mods
13176
13177     return result
13178
13179   @staticmethod
13180   def _CheckMods(kind, mods, key_types, item_fn):
13181     """Ensures requested disk/NIC modifications are valid.
13182
13183     """
13184     for (op, _, params) in mods:
13185       assert ht.TDict(params)
13186
13187       # If 'key_types' is an empty dict, we assume we have an
13188       # 'ext' template and thus do not ForceDictType
13189       if key_types:
13190         utils.ForceDictType(params, key_types)
13191
13192       if op == constants.DDM_REMOVE:
13193         if params:
13194           raise errors.OpPrereqError("No settings should be passed when"
13195                                      " removing a %s" % kind,
13196                                      errors.ECODE_INVAL)
13197       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13198         item_fn(op, params)
13199       else:
13200         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13201
13202   @staticmethod
13203   def _VerifyDiskModification(op, params):
13204     """Verifies a disk modification.
13205
13206     """
13207     if op == constants.DDM_ADD:
13208       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13209       if mode not in constants.DISK_ACCESS_SET:
13210         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13211                                    errors.ECODE_INVAL)
13212
13213       size = params.get(constants.IDISK_SIZE, None)
13214       if size is None:
13215         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13216                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13217
13218       try:
13219         size = int(size)
13220       except (TypeError, ValueError), err:
13221         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13222                                    errors.ECODE_INVAL)
13223
13224       params[constants.IDISK_SIZE] = size
13225
13226     elif op == constants.DDM_MODIFY:
13227       if constants.IDISK_SIZE in params:
13228         raise errors.OpPrereqError("Disk size change not possible, use"
13229                                    " grow-disk", errors.ECODE_INVAL)
13230       if constants.IDISK_MODE not in params:
13231         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13232                                    " modification supported, but missing",
13233                                    errors.ECODE_NOENT)
13234       if len(params) > 1:
13235         raise errors.OpPrereqError("Disk modification doesn't support"
13236                                    " additional arbitrary parameters",
13237                                    errors.ECODE_INVAL)
13238
13239   @staticmethod
13240   def _VerifyNicModification(op, params):
13241     """Verifies a network interface modification.
13242
13243     """
13244     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13245       ip = params.get(constants.INIC_IP, None)
13246       req_net = params.get(constants.INIC_NETWORK, None)
13247       link = params.get(constants.NIC_LINK, None)
13248       mode = params.get(constants.NIC_MODE, None)
13249       if req_net is not None:
13250         if req_net.lower() == constants.VALUE_NONE:
13251           params[constants.INIC_NETWORK] = None
13252           req_net = None
13253         elif link is not None or mode is not None:
13254           raise errors.OpPrereqError("If network is given"
13255                                      " mode or link should not",
13256                                      errors.ECODE_INVAL)
13257
13258       if op == constants.DDM_ADD:
13259         macaddr = params.get(constants.INIC_MAC, None)
13260         if macaddr is None:
13261           params[constants.INIC_MAC] = constants.VALUE_AUTO
13262
13263       if ip is not None:
13264         if ip.lower() == constants.VALUE_NONE:
13265           params[constants.INIC_IP] = None
13266         else:
13267           if ip.lower() == constants.NIC_IP_POOL:
13268             if op == constants.DDM_ADD and req_net is None:
13269               raise errors.OpPrereqError("If ip=pool, parameter network"
13270                                          " cannot be none",
13271                                          errors.ECODE_INVAL)
13272           else:
13273             if not netutils.IPAddress.IsValid(ip):
13274               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13275                                          errors.ECODE_INVAL)
13276
13277       if constants.INIC_MAC in params:
13278         macaddr = params[constants.INIC_MAC]
13279         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13280           macaddr = utils.NormalizeAndValidateMac(macaddr)
13281
13282         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13283           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13284                                      " modifying an existing NIC",
13285                                      errors.ECODE_INVAL)
13286
13287   def CheckArguments(self):
13288     if not (self.op.nics or self.op.disks or self.op.disk_template or
13289             self.op.hvparams or self.op.beparams or self.op.os_name or
13290             self.op.offline is not None or self.op.runtime_mem):
13291       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13292
13293     if self.op.hvparams:
13294       _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13295                             "hypervisor", "instance", "cluster")
13296
13297     self.op.disks = self._UpgradeDiskNicMods(
13298       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13299     self.op.nics = self._UpgradeDiskNicMods(
13300       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13301
13302     if self.op.disks and self.op.disk_template is not None:
13303       raise errors.OpPrereqError("Disk template conversion and other disk"
13304                                  " changes not supported at the same time",
13305                                  errors.ECODE_INVAL)
13306
13307     if (self.op.disk_template and
13308         self.op.disk_template in constants.DTS_INT_MIRROR and
13309         self.op.remote_node is None):
13310       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13311                                  " one requires specifying a secondary node",
13312                                  errors.ECODE_INVAL)
13313
13314     # Check NIC modifications
13315     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13316                     self._VerifyNicModification)
13317
13318   def ExpandNames(self):
13319     self._ExpandAndLockInstance()
13320     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13321     # Can't even acquire node locks in shared mode as upcoming changes in
13322     # Ganeti 2.6 will start to modify the node object on disk conversion
13323     self.needed_locks[locking.LEVEL_NODE] = []
13324     self.needed_locks[locking.LEVEL_NODE_RES] = []
13325     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13326     # Look node group to look up the ipolicy
13327     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13328
13329   def DeclareLocks(self, level):
13330     if level == locking.LEVEL_NODEGROUP:
13331       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13332       # Acquire locks for the instance's nodegroups optimistically. Needs
13333       # to be verified in CheckPrereq
13334       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13335         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13336     elif level == locking.LEVEL_NODE:
13337       self._LockInstancesNodes()
13338       if self.op.disk_template and self.op.remote_node:
13339         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13340         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13341     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13342       # Copy node locks
13343       self.needed_locks[locking.LEVEL_NODE_RES] = \
13344         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13345
13346   def BuildHooksEnv(self):
13347     """Build hooks env.
13348
13349     This runs on the master, primary and secondaries.
13350
13351     """
13352     args = {}
13353     if constants.BE_MINMEM in self.be_new:
13354       args["minmem"] = self.be_new[constants.BE_MINMEM]
13355     if constants.BE_MAXMEM in self.be_new:
13356       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13357     if constants.BE_VCPUS in self.be_new:
13358       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13359     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13360     # information at all.
13361
13362     if self._new_nics is not None:
13363       nics = []
13364
13365       for nic in self._new_nics:
13366         n = copy.deepcopy(nic)
13367         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13368         n.nicparams = nicparams
13369         nics.append(_NICToTuple(self, n))
13370
13371       args["nics"] = nics
13372
13373     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13374     if self.op.disk_template:
13375       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13376     if self.op.runtime_mem:
13377       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13378
13379     return env
13380
13381   def BuildHooksNodes(self):
13382     """Build hooks nodes.
13383
13384     """
13385     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13386     return (nl, nl)
13387
13388   def _PrepareNicModification(self, params, private, old_ip, old_net,
13389                               old_params, cluster, pnode):
13390
13391     update_params_dict = dict([(key, params[key])
13392                                for key in constants.NICS_PARAMETERS
13393                                if key in params])
13394
13395     req_link = update_params_dict.get(constants.NIC_LINK, None)
13396     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13397
13398     new_net = params.get(constants.INIC_NETWORK, old_net)
13399     if new_net is not None:
13400       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13401       if netparams is None:
13402         raise errors.OpPrereqError("No netparams found for the network"
13403                                    " %s, probably not connected" % new_net,
13404                                    errors.ECODE_INVAL)
13405       new_params = dict(netparams)
13406     else:
13407       new_params = _GetUpdatedParams(old_params, update_params_dict)
13408
13409     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13410
13411     new_filled_params = cluster.SimpleFillNIC(new_params)
13412     objects.NIC.CheckParameterSyntax(new_filled_params)
13413
13414     new_mode = new_filled_params[constants.NIC_MODE]
13415     if new_mode == constants.NIC_MODE_BRIDGED:
13416       bridge = new_filled_params[constants.NIC_LINK]
13417       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13418       if msg:
13419         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13420         if self.op.force:
13421           self.warn.append(msg)
13422         else:
13423           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13424
13425     elif new_mode == constants.NIC_MODE_ROUTED:
13426       ip = params.get(constants.INIC_IP, old_ip)
13427       if ip is None:
13428         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13429                                    " on a routed NIC", errors.ECODE_INVAL)
13430
13431     elif new_mode == constants.NIC_MODE_OVS:
13432       # TODO: check OVS link
13433       self.LogInfo("OVS links are currently not checked for correctness")
13434
13435     if constants.INIC_MAC in params:
13436       mac = params[constants.INIC_MAC]
13437       if mac is None:
13438         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13439                                    errors.ECODE_INVAL)
13440       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13441         # otherwise generate the MAC address
13442         params[constants.INIC_MAC] = \
13443           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13444       else:
13445         # or validate/reserve the current one
13446         try:
13447           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13448         except errors.ReservationError:
13449           raise errors.OpPrereqError("MAC address '%s' already in use"
13450                                      " in cluster" % mac,
13451                                      errors.ECODE_NOTUNIQUE)
13452     elif new_net != old_net:
13453
13454       def get_net_prefix(net):
13455         if net:
13456           uuid = self.cfg.LookupNetwork(net)
13457           if uuid:
13458             nobj = self.cfg.GetNetwork(uuid)
13459             return nobj.mac_prefix
13460         return None
13461
13462       new_prefix = get_net_prefix(new_net)
13463       old_prefix = get_net_prefix(old_net)
13464       if old_prefix != new_prefix:
13465         params[constants.INIC_MAC] = \
13466           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13467
13468     #if there is a change in nic-network configuration
13469     new_ip = params.get(constants.INIC_IP, old_ip)
13470     if (new_ip, new_net) != (old_ip, old_net):
13471       if new_ip:
13472         if new_net:
13473           if new_ip.lower() == constants.NIC_IP_POOL:
13474             try:
13475               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13476             except errors.ReservationError:
13477               raise errors.OpPrereqError("Unable to get a free IP"
13478                                          " from the address pool",
13479                                          errors.ECODE_STATE)
13480             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13481             params[constants.INIC_IP] = new_ip
13482           elif new_ip != old_ip or new_net != old_net:
13483             try:
13484               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13485               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13486             except errors.ReservationError:
13487               raise errors.OpPrereqError("IP %s not available in network %s" %
13488                                          (new_ip, new_net),
13489                                          errors.ECODE_NOTUNIQUE)
13490         elif new_ip.lower() == constants.NIC_IP_POOL:
13491           raise errors.OpPrereqError("ip=pool, but no network found",
13492                                      errors.ECODE_INVAL)
13493
13494         # new net is None
13495         elif self.op.conflicts_check:
13496           _CheckForConflictingIp(self, new_ip, pnode)
13497
13498       if old_ip:
13499         if old_net:
13500           try:
13501             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13502           except errors.AddressPoolError:
13503             logging.warning("Release IP %s not contained in network %s",
13504                             old_ip, old_net)
13505
13506     # there are no changes in (net, ip) tuple
13507     elif (old_net is not None and
13508           (req_link is not None or req_mode is not None)):
13509       raise errors.OpPrereqError("Not allowed to change link or mode of"
13510                                  " a NIC that is connected to a network",
13511                                  errors.ECODE_INVAL)
13512
13513     private.params = new_params
13514     private.filled = new_filled_params
13515
13516   def _PreCheckDiskTemplate(self, pnode_info):
13517     """CheckPrereq checks related to a new disk template."""
13518     # Arguments are passed to avoid configuration lookups
13519     instance = self.instance
13520     pnode = instance.primary_node
13521     cluster = self.cluster
13522     if instance.disk_template == self.op.disk_template:
13523       raise errors.OpPrereqError("Instance already has disk template %s" %
13524                                  instance.disk_template, errors.ECODE_INVAL)
13525
13526     if (instance.disk_template,
13527         self.op.disk_template) not in self._DISK_CONVERSIONS:
13528       raise errors.OpPrereqError("Unsupported disk template conversion from"
13529                                  " %s to %s" % (instance.disk_template,
13530                                                 self.op.disk_template),
13531                                  errors.ECODE_INVAL)
13532     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13533                         msg="cannot change disk template")
13534     if self.op.disk_template in constants.DTS_INT_MIRROR:
13535       if self.op.remote_node == pnode:
13536         raise errors.OpPrereqError("Given new secondary node %s is the same"
13537                                    " as the primary node of the instance" %
13538                                    self.op.remote_node, errors.ECODE_STATE)
13539       _CheckNodeOnline(self, self.op.remote_node)
13540       _CheckNodeNotDrained(self, self.op.remote_node)
13541       # FIXME: here we assume that the old instance type is DT_PLAIN
13542       assert instance.disk_template == constants.DT_PLAIN
13543       disks = [{constants.IDISK_SIZE: d.size,
13544                 constants.IDISK_VG: d.logical_id[0]}
13545                for d in instance.disks]
13546       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13547       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13548
13549       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13550       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13551       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13552                                                               snode_group)
13553       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13554                               ignore=self.op.ignore_ipolicy)
13555       if pnode_info.group != snode_info.group:
13556         self.LogWarning("The primary and secondary nodes are in two"
13557                         " different node groups; the disk parameters"
13558                         " from the first disk's node group will be"
13559                         " used")
13560
13561     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13562       # Make sure none of the nodes require exclusive storage
13563       nodes = [pnode_info]
13564       if self.op.disk_template in constants.DTS_INT_MIRROR:
13565         assert snode_info
13566         nodes.append(snode_info)
13567       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13568       if compat.any(map(has_es, nodes)):
13569         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13570                   " storage is enabled" % (instance.disk_template,
13571                                            self.op.disk_template))
13572         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13573
13574   def CheckPrereq(self):
13575     """Check prerequisites.
13576
13577     This only checks the instance list against the existing names.
13578
13579     """
13580     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13581     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13582
13583     cluster = self.cluster = self.cfg.GetClusterInfo()
13584     assert self.instance is not None, \
13585       "Cannot retrieve locked instance %s" % self.op.instance_name
13586
13587     pnode = instance.primary_node
13588     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13589     nodelist = list(instance.all_nodes)
13590     pnode_info = self.cfg.GetNodeInfo(pnode)
13591     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13592
13593     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13594     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13595     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13596
13597     # dictionary with instance information after the modification
13598     ispec = {}
13599
13600     # Check disk modifications. This is done here and not in CheckArguments
13601     # (as with NICs), because we need to know the instance's disk template
13602     if instance.disk_template == constants.DT_EXT:
13603       self._CheckMods("disk", self.op.disks, {},
13604                       self._VerifyDiskModification)
13605     else:
13606       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13607                       self._VerifyDiskModification)
13608
13609     # Prepare disk/NIC modifications
13610     self.diskmod = PrepareContainerMods(self.op.disks, None)
13611     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13612
13613     # Check the validity of the `provider' parameter
13614     if instance.disk_template in constants.DT_EXT:
13615       for mod in self.diskmod:
13616         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13617         if mod[0] == constants.DDM_ADD:
13618           if ext_provider is None:
13619             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13620                                        " '%s' missing, during disk add" %
13621                                        (constants.DT_EXT,
13622                                         constants.IDISK_PROVIDER),
13623                                        errors.ECODE_NOENT)
13624         elif mod[0] == constants.DDM_MODIFY:
13625           if ext_provider:
13626             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13627                                        " modification" %
13628                                        constants.IDISK_PROVIDER,
13629                                        errors.ECODE_INVAL)
13630     else:
13631       for mod in self.diskmod:
13632         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13633         if ext_provider is not None:
13634           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13635                                      " instances of type '%s'" %
13636                                      (constants.IDISK_PROVIDER,
13637                                       constants.DT_EXT),
13638                                      errors.ECODE_INVAL)
13639
13640     # OS change
13641     if self.op.os_name and not self.op.force:
13642       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13643                       self.op.force_variant)
13644       instance_os = self.op.os_name
13645     else:
13646       instance_os = instance.os
13647
13648     assert not (self.op.disk_template and self.op.disks), \
13649       "Can't modify disk template and apply disk changes at the same time"
13650
13651     if self.op.disk_template:
13652       self._PreCheckDiskTemplate(pnode_info)
13653
13654     # hvparams processing
13655     if self.op.hvparams:
13656       hv_type = instance.hypervisor
13657       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13658       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13659       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13660
13661       # local check
13662       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13663       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13664       self.hv_proposed = self.hv_new = hv_new # the new actual values
13665       self.hv_inst = i_hvdict # the new dict (without defaults)
13666     else:
13667       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13668                                               instance.hvparams)
13669       self.hv_new = self.hv_inst = {}
13670
13671     # beparams processing
13672     if self.op.beparams:
13673       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13674                                    use_none=True)
13675       objects.UpgradeBeParams(i_bedict)
13676       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13677       be_new = cluster.SimpleFillBE(i_bedict)
13678       self.be_proposed = self.be_new = be_new # the new actual values
13679       self.be_inst = i_bedict # the new dict (without defaults)
13680     else:
13681       self.be_new = self.be_inst = {}
13682       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13683     be_old = cluster.FillBE(instance)
13684
13685     # CPU param validation -- checking every time a parameter is
13686     # changed to cover all cases where either CPU mask or vcpus have
13687     # changed
13688     if (constants.BE_VCPUS in self.be_proposed and
13689         constants.HV_CPU_MASK in self.hv_proposed):
13690       cpu_list = \
13691         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13692       # Verify mask is consistent with number of vCPUs. Can skip this
13693       # test if only 1 entry in the CPU mask, which means same mask
13694       # is applied to all vCPUs.
13695       if (len(cpu_list) > 1 and
13696           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13697         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13698                                    " CPU mask [%s]" %
13699                                    (self.be_proposed[constants.BE_VCPUS],
13700                                     self.hv_proposed[constants.HV_CPU_MASK]),
13701                                    errors.ECODE_INVAL)
13702
13703       # Only perform this test if a new CPU mask is given
13704       if constants.HV_CPU_MASK in self.hv_new:
13705         # Calculate the largest CPU number requested
13706         max_requested_cpu = max(map(max, cpu_list))
13707         # Check that all of the instance's nodes have enough physical CPUs to
13708         # satisfy the requested CPU mask
13709         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13710                                 max_requested_cpu + 1, instance.hypervisor)
13711
13712     # osparams processing
13713     if self.op.osparams:
13714       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13715       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13716       self.os_inst = i_osdict # the new dict (without defaults)
13717     else:
13718       self.os_inst = {}
13719
13720     self.warn = []
13721
13722     #TODO(dynmem): do the appropriate check involving MINMEM
13723     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13724         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13725       mem_check_list = [pnode]
13726       if be_new[constants.BE_AUTO_BALANCE]:
13727         # either we changed auto_balance to yes or it was from before
13728         mem_check_list.extend(instance.secondary_nodes)
13729       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13730                                                   instance.hypervisor)
13731       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13732                                          [instance.hypervisor], False)
13733       pninfo = nodeinfo[pnode]
13734       msg = pninfo.fail_msg
13735       if msg:
13736         # Assume the primary node is unreachable and go ahead
13737         self.warn.append("Can't get info from primary node %s: %s" %
13738                          (pnode, msg))
13739       else:
13740         (_, _, (pnhvinfo, )) = pninfo.payload
13741         if not isinstance(pnhvinfo.get("memory_free", None), int):
13742           self.warn.append("Node data from primary node %s doesn't contain"
13743                            " free memory information" % pnode)
13744         elif instance_info.fail_msg:
13745           self.warn.append("Can't get instance runtime information: %s" %
13746                            instance_info.fail_msg)
13747         else:
13748           if instance_info.payload:
13749             current_mem = int(instance_info.payload["memory"])
13750           else:
13751             # Assume instance not running
13752             # (there is a slight race condition here, but it's not very
13753             # probable, and we have no other way to check)
13754             # TODO: Describe race condition
13755             current_mem = 0
13756           #TODO(dynmem): do the appropriate check involving MINMEM
13757           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13758                       pnhvinfo["memory_free"])
13759           if miss_mem > 0:
13760             raise errors.OpPrereqError("This change will prevent the instance"
13761                                        " from starting, due to %d MB of memory"
13762                                        " missing on its primary node" %
13763                                        miss_mem, errors.ECODE_NORES)
13764
13765       if be_new[constants.BE_AUTO_BALANCE]:
13766         for node, nres in nodeinfo.items():
13767           if node not in instance.secondary_nodes:
13768             continue
13769           nres.Raise("Can't get info from secondary node %s" % node,
13770                      prereq=True, ecode=errors.ECODE_STATE)
13771           (_, _, (nhvinfo, )) = nres.payload
13772           if not isinstance(nhvinfo.get("memory_free", None), int):
13773             raise errors.OpPrereqError("Secondary node %s didn't return free"
13774                                        " memory information" % node,
13775                                        errors.ECODE_STATE)
13776           #TODO(dynmem): do the appropriate check involving MINMEM
13777           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13778             raise errors.OpPrereqError("This change will prevent the instance"
13779                                        " from failover to its secondary node"
13780                                        " %s, due to not enough memory" % node,
13781                                        errors.ECODE_STATE)
13782
13783     if self.op.runtime_mem:
13784       remote_info = self.rpc.call_instance_info(instance.primary_node,
13785                                                 instance.name,
13786                                                 instance.hypervisor)
13787       remote_info.Raise("Error checking node %s" % instance.primary_node)
13788       if not remote_info.payload: # not running already
13789         raise errors.OpPrereqError("Instance %s is not running" %
13790                                    instance.name, errors.ECODE_STATE)
13791
13792       current_memory = remote_info.payload["memory"]
13793       if (not self.op.force and
13794            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13795             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13796         raise errors.OpPrereqError("Instance %s must have memory between %d"
13797                                    " and %d MB of memory unless --force is"
13798                                    " given" %
13799                                    (instance.name,
13800                                     self.be_proposed[constants.BE_MINMEM],
13801                                     self.be_proposed[constants.BE_MAXMEM]),
13802                                    errors.ECODE_INVAL)
13803
13804       delta = self.op.runtime_mem - current_memory
13805       if delta > 0:
13806         _CheckNodeFreeMemory(self, instance.primary_node,
13807                              "ballooning memory for instance %s" %
13808                              instance.name, delta, instance.hypervisor)
13809
13810     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13811       raise errors.OpPrereqError("Disk operations not supported for"
13812                                  " diskless instances", errors.ECODE_INVAL)
13813
13814     def _PrepareNicCreate(_, params, private):
13815       self._PrepareNicModification(params, private, None, None,
13816                                    {}, cluster, pnode)
13817       return (None, None)
13818
13819     def _PrepareNicMod(_, nic, params, private):
13820       self._PrepareNicModification(params, private, nic.ip, nic.network,
13821                                    nic.nicparams, cluster, pnode)
13822       return None
13823
13824     def _PrepareNicRemove(_, params, __):
13825       ip = params.ip
13826       net = params.network
13827       if net is not None and ip is not None:
13828         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13829
13830     # Verify NIC changes (operating on copy)
13831     nics = instance.nics[:]
13832     ApplyContainerMods("NIC", nics, None, self.nicmod,
13833                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13834     if len(nics) > constants.MAX_NICS:
13835       raise errors.OpPrereqError("Instance has too many network interfaces"
13836                                  " (%d), cannot add more" % constants.MAX_NICS,
13837                                  errors.ECODE_STATE)
13838
13839     # Verify disk changes (operating on a copy)
13840     disks = instance.disks[:]
13841     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13842     if len(disks) > constants.MAX_DISKS:
13843       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13844                                  " more" % constants.MAX_DISKS,
13845                                  errors.ECODE_STATE)
13846     disk_sizes = [disk.size for disk in instance.disks]
13847     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13848                       self.diskmod if op == constants.DDM_ADD)
13849     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13850     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13851
13852     if self.op.offline is not None and self.op.offline:
13853       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13854                           msg="can't change to offline")
13855
13856     # Pre-compute NIC changes (necessary to use result in hooks)
13857     self._nic_chgdesc = []
13858     if self.nicmod:
13859       # Operate on copies as this is still in prereq
13860       nics = [nic.Copy() for nic in instance.nics]
13861       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13862                          self._CreateNewNic, self._ApplyNicMods, None)
13863       self._new_nics = nics
13864       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13865     else:
13866       self._new_nics = None
13867       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13868
13869     if not self.op.ignore_ipolicy:
13870       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13871                                                               group_info)
13872
13873       # Fill ispec with backend parameters
13874       ispec[constants.ISPEC_SPINDLE_USE] = \
13875         self.be_new.get(constants.BE_SPINDLE_USE, None)
13876       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13877                                                          None)
13878
13879       # Copy ispec to verify parameters with min/max values separately
13880       ispec_max = ispec.copy()
13881       ispec_max[constants.ISPEC_MEM_SIZE] = \
13882         self.be_new.get(constants.BE_MAXMEM, None)
13883       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13884       ispec_min = ispec.copy()
13885       ispec_min[constants.ISPEC_MEM_SIZE] = \
13886         self.be_new.get(constants.BE_MINMEM, None)
13887       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13888
13889       if (res_max or res_min):
13890         # FIXME: Improve error message by including information about whether
13891         # the upper or lower limit of the parameter fails the ipolicy.
13892         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13893                (group_info, group_info.name,
13894                 utils.CommaJoin(set(res_max + res_min))))
13895         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13896
13897   def _ConvertPlainToDrbd(self, feedback_fn):
13898     """Converts an instance from plain to drbd.
13899
13900     """
13901     feedback_fn("Converting template to drbd")
13902     instance = self.instance
13903     pnode = instance.primary_node
13904     snode = self.op.remote_node
13905
13906     assert instance.disk_template == constants.DT_PLAIN
13907
13908     # create a fake disk info for _GenerateDiskTemplate
13909     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13910                   constants.IDISK_VG: d.logical_id[0]}
13911                  for d in instance.disks]
13912     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13913                                       instance.name, pnode, [snode],
13914                                       disk_info, None, None, 0, feedback_fn,
13915                                       self.diskparams)
13916     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13917                                         self.diskparams)
13918     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13919     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13920     info = _GetInstanceInfoText(instance)
13921     feedback_fn("Creating additional volumes...")
13922     # first, create the missing data and meta devices
13923     for disk in anno_disks:
13924       # unfortunately this is... not too nice
13925       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13926                             info, True, p_excl_stor)
13927       for child in disk.children:
13928         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13929                               s_excl_stor)
13930     # at this stage, all new LVs have been created, we can rename the
13931     # old ones
13932     feedback_fn("Renaming original volumes...")
13933     rename_list = [(o, n.children[0].logical_id)
13934                    for (o, n) in zip(instance.disks, new_disks)]
13935     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13936     result.Raise("Failed to rename original LVs")
13937
13938     feedback_fn("Initializing DRBD devices...")
13939     # all child devices are in place, we can now create the DRBD devices
13940     for disk in anno_disks:
13941       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13942         f_create = node == pnode
13943         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13944                               excl_stor)
13945
13946     # at this point, the instance has been modified
13947     instance.disk_template = constants.DT_DRBD8
13948     instance.disks = new_disks
13949     self.cfg.Update(instance, feedback_fn)
13950
13951     # Release node locks while waiting for sync
13952     _ReleaseLocks(self, locking.LEVEL_NODE)
13953
13954     # disks are created, waiting for sync
13955     disk_abort = not _WaitForSync(self, instance,
13956                                   oneshot=not self.op.wait_for_sync)
13957     if disk_abort:
13958       raise errors.OpExecError("There are some degraded disks for"
13959                                " this instance, please cleanup manually")
13960
13961     # Node resource locks will be released by caller
13962
13963   def _ConvertDrbdToPlain(self, feedback_fn):
13964     """Converts an instance from drbd to plain.
13965
13966     """
13967     instance = self.instance
13968
13969     assert len(instance.secondary_nodes) == 1
13970     assert instance.disk_template == constants.DT_DRBD8
13971
13972     pnode = instance.primary_node
13973     snode = instance.secondary_nodes[0]
13974     feedback_fn("Converting template to plain")
13975
13976     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13977     new_disks = [d.children[0] for d in instance.disks]
13978
13979     # copy over size and mode
13980     for parent, child in zip(old_disks, new_disks):
13981       child.size = parent.size
13982       child.mode = parent.mode
13983
13984     # this is a DRBD disk, return its port to the pool
13985     # NOTE: this must be done right before the call to cfg.Update!
13986     for disk in old_disks:
13987       tcp_port = disk.logical_id[2]
13988       self.cfg.AddTcpUdpPort(tcp_port)
13989
13990     # update instance structure
13991     instance.disks = new_disks
13992     instance.disk_template = constants.DT_PLAIN
13993     self.cfg.Update(instance, feedback_fn)
13994
13995     # Release locks in case removing disks takes a while
13996     _ReleaseLocks(self, locking.LEVEL_NODE)
13997
13998     feedback_fn("Removing volumes on the secondary node...")
13999     for disk in old_disks:
14000       self.cfg.SetDiskID(disk, snode)
14001       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14002       if msg:
14003         self.LogWarning("Could not remove block device %s on node %s,"
14004                         " continuing anyway: %s", disk.iv_name, snode, msg)
14005
14006     feedback_fn("Removing unneeded volumes on the primary node...")
14007     for idx, disk in enumerate(old_disks):
14008       meta = disk.children[1]
14009       self.cfg.SetDiskID(meta, pnode)
14010       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14011       if msg:
14012         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14013                         " continuing anyway: %s", idx, pnode, msg)
14014
14015   def _CreateNewDisk(self, idx, params, _):
14016     """Creates a new disk.
14017
14018     """
14019     instance = self.instance
14020
14021     # add a new disk
14022     if instance.disk_template in constants.DTS_FILEBASED:
14023       (file_driver, file_path) = instance.disks[0].logical_id
14024       file_path = os.path.dirname(file_path)
14025     else:
14026       file_driver = file_path = None
14027
14028     disk = \
14029       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14030                             instance.primary_node, instance.secondary_nodes,
14031                             [params], file_path, file_driver, idx,
14032                             self.Log, self.diskparams)[0]
14033
14034     info = _GetInstanceInfoText(instance)
14035
14036     logging.info("Creating volume %s for instance %s",
14037                  disk.iv_name, instance.name)
14038     # Note: this needs to be kept in sync with _CreateDisks
14039     #HARDCODE
14040     for node in instance.all_nodes:
14041       f_create = (node == instance.primary_node)
14042       try:
14043         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14044       except errors.OpExecError, err:
14045         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14046                         disk.iv_name, disk, node, err)
14047
14048     return (disk, [
14049       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14050       ])
14051
14052   @staticmethod
14053   def _ModifyDisk(idx, disk, params, _):
14054     """Modifies a disk.
14055
14056     """
14057     disk.mode = params[constants.IDISK_MODE]
14058
14059     return [
14060       ("disk.mode/%d" % idx, disk.mode),
14061       ]
14062
14063   def _RemoveDisk(self, idx, root, _):
14064     """Removes a disk.
14065
14066     """
14067     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14068     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14069       self.cfg.SetDiskID(disk, node)
14070       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14071       if msg:
14072         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14073                         " continuing anyway", idx, node, msg)
14074
14075     # if this is a DRBD disk, return its port to the pool
14076     if root.dev_type in constants.LDS_DRBD:
14077       self.cfg.AddTcpUdpPort(root.logical_id[2])
14078
14079   @staticmethod
14080   def _CreateNewNic(idx, params, private):
14081     """Creates data structure for a new network interface.
14082
14083     """
14084     mac = params[constants.INIC_MAC]
14085     ip = params.get(constants.INIC_IP, None)
14086     net = params.get(constants.INIC_NETWORK, None)
14087     #TODO: not private.filled?? can a nic have no nicparams??
14088     nicparams = private.filled
14089
14090     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14091       ("nic.%d" % idx,
14092        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14093        (mac, ip, private.filled[constants.NIC_MODE],
14094        private.filled[constants.NIC_LINK],
14095        net)),
14096       ])
14097
14098   @staticmethod
14099   def _ApplyNicMods(idx, nic, params, private):
14100     """Modifies a network interface.
14101
14102     """
14103     changes = []
14104
14105     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14106       if key in params:
14107         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14108         setattr(nic, key, params[key])
14109
14110     if private.filled:
14111       nic.nicparams = private.filled
14112
14113       for (key, val) in nic.nicparams.items():
14114         changes.append(("nic.%s/%d" % (key, idx), val))
14115
14116     return changes
14117
14118   def Exec(self, feedback_fn):
14119     """Modifies an instance.
14120
14121     All parameters take effect only at the next restart of the instance.
14122
14123     """
14124     # Process here the warnings from CheckPrereq, as we don't have a
14125     # feedback_fn there.
14126     # TODO: Replace with self.LogWarning
14127     for warn in self.warn:
14128       feedback_fn("WARNING: %s" % warn)
14129
14130     assert ((self.op.disk_template is None) ^
14131             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14132       "Not owning any node resource locks"
14133
14134     result = []
14135     instance = self.instance
14136
14137     # runtime memory
14138     if self.op.runtime_mem:
14139       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14140                                                      instance,
14141                                                      self.op.runtime_mem)
14142       rpcres.Raise("Cannot modify instance runtime memory")
14143       result.append(("runtime_memory", self.op.runtime_mem))
14144
14145     # Apply disk changes
14146     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14147                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14148     _UpdateIvNames(0, instance.disks)
14149
14150     if self.op.disk_template:
14151       if __debug__:
14152         check_nodes = set(instance.all_nodes)
14153         if self.op.remote_node:
14154           check_nodes.add(self.op.remote_node)
14155         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14156           owned = self.owned_locks(level)
14157           assert not (check_nodes - owned), \
14158             ("Not owning the correct locks, owning %r, expected at least %r" %
14159              (owned, check_nodes))
14160
14161       r_shut = _ShutdownInstanceDisks(self, instance)
14162       if not r_shut:
14163         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14164                                  " proceed with disk template conversion")
14165       mode = (instance.disk_template, self.op.disk_template)
14166       try:
14167         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14168       except:
14169         self.cfg.ReleaseDRBDMinors(instance.name)
14170         raise
14171       result.append(("disk_template", self.op.disk_template))
14172
14173       assert instance.disk_template == self.op.disk_template, \
14174         ("Expected disk template '%s', found '%s'" %
14175          (self.op.disk_template, instance.disk_template))
14176
14177     # Release node and resource locks if there are any (they might already have
14178     # been released during disk conversion)
14179     _ReleaseLocks(self, locking.LEVEL_NODE)
14180     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14181
14182     # Apply NIC changes
14183     if self._new_nics is not None:
14184       instance.nics = self._new_nics
14185       result.extend(self._nic_chgdesc)
14186
14187     # hvparams changes
14188     if self.op.hvparams:
14189       instance.hvparams = self.hv_inst
14190       for key, val in self.op.hvparams.iteritems():
14191         result.append(("hv/%s" % key, val))
14192
14193     # beparams changes
14194     if self.op.beparams:
14195       instance.beparams = self.be_inst
14196       for key, val in self.op.beparams.iteritems():
14197         result.append(("be/%s" % key, val))
14198
14199     # OS change
14200     if self.op.os_name:
14201       instance.os = self.op.os_name
14202
14203     # osparams changes
14204     if self.op.osparams:
14205       instance.osparams = self.os_inst
14206       for key, val in self.op.osparams.iteritems():
14207         result.append(("os/%s" % key, val))
14208
14209     if self.op.offline is None:
14210       # Ignore
14211       pass
14212     elif self.op.offline:
14213       # Mark instance as offline
14214       self.cfg.MarkInstanceOffline(instance.name)
14215       result.append(("admin_state", constants.ADMINST_OFFLINE))
14216     else:
14217       # Mark instance as online, but stopped
14218       self.cfg.MarkInstanceDown(instance.name)
14219       result.append(("admin_state", constants.ADMINST_DOWN))
14220
14221     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14222
14223     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14224                 self.owned_locks(locking.LEVEL_NODE)), \
14225       "All node locks should have been released by now"
14226
14227     return result
14228
14229   _DISK_CONVERSIONS = {
14230     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14231     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14232     }
14233
14234
14235 class LUInstanceChangeGroup(LogicalUnit):
14236   HPATH = "instance-change-group"
14237   HTYPE = constants.HTYPE_INSTANCE
14238   REQ_BGL = False
14239
14240   def ExpandNames(self):
14241     self.share_locks = _ShareAll()
14242
14243     self.needed_locks = {
14244       locking.LEVEL_NODEGROUP: [],
14245       locking.LEVEL_NODE: [],
14246       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14247       }
14248
14249     self._ExpandAndLockInstance()
14250
14251     if self.op.target_groups:
14252       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14253                                   self.op.target_groups)
14254     else:
14255       self.req_target_uuids = None
14256
14257     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14258
14259   def DeclareLocks(self, level):
14260     if level == locking.LEVEL_NODEGROUP:
14261       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14262
14263       if self.req_target_uuids:
14264         lock_groups = set(self.req_target_uuids)
14265
14266         # Lock all groups used by instance optimistically; this requires going
14267         # via the node before it's locked, requiring verification later on
14268         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14269         lock_groups.update(instance_groups)
14270       else:
14271         # No target groups, need to lock all of them
14272         lock_groups = locking.ALL_SET
14273
14274       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14275
14276     elif level == locking.LEVEL_NODE:
14277       if self.req_target_uuids:
14278         # Lock all nodes used by instances
14279         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14280         self._LockInstancesNodes()
14281
14282         # Lock all nodes in all potential target groups
14283         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14284                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14285         member_nodes = [node_name
14286                         for group in lock_groups
14287                         for node_name in self.cfg.GetNodeGroup(group).members]
14288         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14289       else:
14290         # Lock all nodes as all groups are potential targets
14291         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14292
14293   def CheckPrereq(self):
14294     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14295     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14296     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14297
14298     assert (self.req_target_uuids is None or
14299             owned_groups.issuperset(self.req_target_uuids))
14300     assert owned_instances == set([self.op.instance_name])
14301
14302     # Get instance information
14303     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14304
14305     # Check if node groups for locked instance are still correct
14306     assert owned_nodes.issuperset(self.instance.all_nodes), \
14307       ("Instance %s's nodes changed while we kept the lock" %
14308        self.op.instance_name)
14309
14310     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14311                                            owned_groups)
14312
14313     if self.req_target_uuids:
14314       # User requested specific target groups
14315       self.target_uuids = frozenset(self.req_target_uuids)
14316     else:
14317       # All groups except those used by the instance are potential targets
14318       self.target_uuids = owned_groups - inst_groups
14319
14320     conflicting_groups = self.target_uuids & inst_groups
14321     if conflicting_groups:
14322       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14323                                  " used by the instance '%s'" %
14324                                  (utils.CommaJoin(conflicting_groups),
14325                                   self.op.instance_name),
14326                                  errors.ECODE_INVAL)
14327
14328     if not self.target_uuids:
14329       raise errors.OpPrereqError("There are no possible target groups",
14330                                  errors.ECODE_INVAL)
14331
14332   def BuildHooksEnv(self):
14333     """Build hooks env.
14334
14335     """
14336     assert self.target_uuids
14337
14338     env = {
14339       "TARGET_GROUPS": " ".join(self.target_uuids),
14340       }
14341
14342     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14343
14344     return env
14345
14346   def BuildHooksNodes(self):
14347     """Build hooks nodes.
14348
14349     """
14350     mn = self.cfg.GetMasterNode()
14351     return ([mn], [mn])
14352
14353   def Exec(self, feedback_fn):
14354     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14355
14356     assert instances == [self.op.instance_name], "Instance not locked"
14357
14358     req = iallocator.IAReqGroupChange(instances=instances,
14359                                       target_groups=list(self.target_uuids))
14360     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14361
14362     ial.Run(self.op.iallocator)
14363
14364     if not ial.success:
14365       raise errors.OpPrereqError("Can't compute solution for changing group of"
14366                                  " instance '%s' using iallocator '%s': %s" %
14367                                  (self.op.instance_name, self.op.iallocator,
14368                                   ial.info), errors.ECODE_NORES)
14369
14370     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14371
14372     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14373                  " instance '%s'", len(jobs), self.op.instance_name)
14374
14375     return ResultWithJobs(jobs)
14376
14377
14378 class LUBackupQuery(NoHooksLU):
14379   """Query the exports list
14380
14381   """
14382   REQ_BGL = False
14383
14384   def CheckArguments(self):
14385     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14386                              ["node", "export"], self.op.use_locking)
14387
14388   def ExpandNames(self):
14389     self.expq.ExpandNames(self)
14390
14391   def DeclareLocks(self, level):
14392     self.expq.DeclareLocks(self, level)
14393
14394   def Exec(self, feedback_fn):
14395     result = {}
14396
14397     for (node, expname) in self.expq.OldStyleQuery(self):
14398       if expname is None:
14399         result[node] = False
14400       else:
14401         result.setdefault(node, []).append(expname)
14402
14403     return result
14404
14405
14406 class _ExportQuery(_QueryBase):
14407   FIELDS = query.EXPORT_FIELDS
14408
14409   #: The node name is not a unique key for this query
14410   SORT_FIELD = "node"
14411
14412   def ExpandNames(self, lu):
14413     lu.needed_locks = {}
14414
14415     # The following variables interact with _QueryBase._GetNames
14416     if self.names:
14417       self.wanted = _GetWantedNodes(lu, self.names)
14418     else:
14419       self.wanted = locking.ALL_SET
14420
14421     self.do_locking = self.use_locking
14422
14423     if self.do_locking:
14424       lu.share_locks = _ShareAll()
14425       lu.needed_locks = {
14426         locking.LEVEL_NODE: self.wanted,
14427         }
14428
14429       if not self.names:
14430         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14431
14432   def DeclareLocks(self, lu, level):
14433     pass
14434
14435   def _GetQueryData(self, lu):
14436     """Computes the list of nodes and their attributes.
14437
14438     """
14439     # Locking is not used
14440     # TODO
14441     assert not (compat.any(lu.glm.is_owned(level)
14442                            for level in locking.LEVELS
14443                            if level != locking.LEVEL_CLUSTER) or
14444                 self.do_locking or self.use_locking)
14445
14446     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14447
14448     result = []
14449
14450     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14451       if nres.fail_msg:
14452         result.append((node, None))
14453       else:
14454         result.extend((node, expname) for expname in nres.payload)
14455
14456     return result
14457
14458
14459 class LUBackupPrepare(NoHooksLU):
14460   """Prepares an instance for an export and returns useful information.
14461
14462   """
14463   REQ_BGL = False
14464
14465   def ExpandNames(self):
14466     self._ExpandAndLockInstance()
14467
14468   def CheckPrereq(self):
14469     """Check prerequisites.
14470
14471     """
14472     instance_name = self.op.instance_name
14473
14474     self.instance = self.cfg.GetInstanceInfo(instance_name)
14475     assert self.instance is not None, \
14476           "Cannot retrieve locked instance %s" % self.op.instance_name
14477     _CheckNodeOnline(self, self.instance.primary_node)
14478
14479     self._cds = _GetClusterDomainSecret()
14480
14481   def Exec(self, feedback_fn):
14482     """Prepares an instance for an export.
14483
14484     """
14485     instance = self.instance
14486
14487     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14488       salt = utils.GenerateSecret(8)
14489
14490       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14491       result = self.rpc.call_x509_cert_create(instance.primary_node,
14492                                               constants.RIE_CERT_VALIDITY)
14493       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14494
14495       (name, cert_pem) = result.payload
14496
14497       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14498                                              cert_pem)
14499
14500       return {
14501         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14502         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14503                           salt),
14504         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14505         }
14506
14507     return None
14508
14509
14510 class LUBackupExport(LogicalUnit):
14511   """Export an instance to an image in the cluster.
14512
14513   """
14514   HPATH = "instance-export"
14515   HTYPE = constants.HTYPE_INSTANCE
14516   REQ_BGL = False
14517
14518   def CheckArguments(self):
14519     """Check the arguments.
14520
14521     """
14522     self.x509_key_name = self.op.x509_key_name
14523     self.dest_x509_ca_pem = self.op.destination_x509_ca
14524
14525     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14526       if not self.x509_key_name:
14527         raise errors.OpPrereqError("Missing X509 key name for encryption",
14528                                    errors.ECODE_INVAL)
14529
14530       if not self.dest_x509_ca_pem:
14531         raise errors.OpPrereqError("Missing destination X509 CA",
14532                                    errors.ECODE_INVAL)
14533
14534   def ExpandNames(self):
14535     self._ExpandAndLockInstance()
14536
14537     # Lock all nodes for local exports
14538     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14539       # FIXME: lock only instance primary and destination node
14540       #
14541       # Sad but true, for now we have do lock all nodes, as we don't know where
14542       # the previous export might be, and in this LU we search for it and
14543       # remove it from its current node. In the future we could fix this by:
14544       #  - making a tasklet to search (share-lock all), then create the
14545       #    new one, then one to remove, after
14546       #  - removing the removal operation altogether
14547       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14548
14549       # Allocations should be stopped while this LU runs with node locks, but
14550       # it doesn't have to be exclusive
14551       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14552       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14553
14554   def DeclareLocks(self, level):
14555     """Last minute lock declaration."""
14556     # All nodes are locked anyway, so nothing to do here.
14557
14558   def BuildHooksEnv(self):
14559     """Build hooks env.
14560
14561     This will run on the master, primary node and target node.
14562
14563     """
14564     env = {
14565       "EXPORT_MODE": self.op.mode,
14566       "EXPORT_NODE": self.op.target_node,
14567       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14568       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14569       # TODO: Generic function for boolean env variables
14570       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14571       }
14572
14573     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14574
14575     return env
14576
14577   def BuildHooksNodes(self):
14578     """Build hooks nodes.
14579
14580     """
14581     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14582
14583     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14584       nl.append(self.op.target_node)
14585
14586     return (nl, nl)
14587
14588   def CheckPrereq(self):
14589     """Check prerequisites.
14590
14591     This checks that the instance and node names are valid.
14592
14593     """
14594     instance_name = self.op.instance_name
14595
14596     self.instance = self.cfg.GetInstanceInfo(instance_name)
14597     assert self.instance is not None, \
14598           "Cannot retrieve locked instance %s" % self.op.instance_name
14599     _CheckNodeOnline(self, self.instance.primary_node)
14600
14601     if (self.op.remove_instance and
14602         self.instance.admin_state == constants.ADMINST_UP and
14603         not self.op.shutdown):
14604       raise errors.OpPrereqError("Can not remove instance without shutting it"
14605                                  " down before", errors.ECODE_STATE)
14606
14607     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14608       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14609       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14610       assert self.dst_node is not None
14611
14612       _CheckNodeOnline(self, self.dst_node.name)
14613       _CheckNodeNotDrained(self, self.dst_node.name)
14614
14615       self._cds = None
14616       self.dest_disk_info = None
14617       self.dest_x509_ca = None
14618
14619     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14620       self.dst_node = None
14621
14622       if len(self.op.target_node) != len(self.instance.disks):
14623         raise errors.OpPrereqError(("Received destination information for %s"
14624                                     " disks, but instance %s has %s disks") %
14625                                    (len(self.op.target_node), instance_name,
14626                                     len(self.instance.disks)),
14627                                    errors.ECODE_INVAL)
14628
14629       cds = _GetClusterDomainSecret()
14630
14631       # Check X509 key name
14632       try:
14633         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14634       except (TypeError, ValueError), err:
14635         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14636                                    errors.ECODE_INVAL)
14637
14638       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14639         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14640                                    errors.ECODE_INVAL)
14641
14642       # Load and verify CA
14643       try:
14644         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14645       except OpenSSL.crypto.Error, err:
14646         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14647                                    (err, ), errors.ECODE_INVAL)
14648
14649       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14650       if errcode is not None:
14651         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14652                                    (msg, ), errors.ECODE_INVAL)
14653
14654       self.dest_x509_ca = cert
14655
14656       # Verify target information
14657       disk_info = []
14658       for idx, disk_data in enumerate(self.op.target_node):
14659         try:
14660           (host, port, magic) = \
14661             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14662         except errors.GenericError, err:
14663           raise errors.OpPrereqError("Target info for disk %s: %s" %
14664                                      (idx, err), errors.ECODE_INVAL)
14665
14666         disk_info.append((host, port, magic))
14667
14668       assert len(disk_info) == len(self.op.target_node)
14669       self.dest_disk_info = disk_info
14670
14671     else:
14672       raise errors.ProgrammerError("Unhandled export mode %r" %
14673                                    self.op.mode)
14674
14675     # instance disk type verification
14676     # TODO: Implement export support for file-based disks
14677     for disk in self.instance.disks:
14678       if disk.dev_type == constants.LD_FILE:
14679         raise errors.OpPrereqError("Export not supported for instances with"
14680                                    " file-based disks", errors.ECODE_INVAL)
14681
14682   def _CleanupExports(self, feedback_fn):
14683     """Removes exports of current instance from all other nodes.
14684
14685     If an instance in a cluster with nodes A..D was exported to node C, its
14686     exports will be removed from the nodes A, B and D.
14687
14688     """
14689     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14690
14691     nodelist = self.cfg.GetNodeList()
14692     nodelist.remove(self.dst_node.name)
14693
14694     # on one-node clusters nodelist will be empty after the removal
14695     # if we proceed the backup would be removed because OpBackupQuery
14696     # substitutes an empty list with the full cluster node list.
14697     iname = self.instance.name
14698     if nodelist:
14699       feedback_fn("Removing old exports for instance %s" % iname)
14700       exportlist = self.rpc.call_export_list(nodelist)
14701       for node in exportlist:
14702         if exportlist[node].fail_msg:
14703           continue
14704         if iname in exportlist[node].payload:
14705           msg = self.rpc.call_export_remove(node, iname).fail_msg
14706           if msg:
14707             self.LogWarning("Could not remove older export for instance %s"
14708                             " on node %s: %s", iname, node, msg)
14709
14710   def Exec(self, feedback_fn):
14711     """Export an instance to an image in the cluster.
14712
14713     """
14714     assert self.op.mode in constants.EXPORT_MODES
14715
14716     instance = self.instance
14717     src_node = instance.primary_node
14718
14719     if self.op.shutdown:
14720       # shutdown the instance, but not the disks
14721       feedback_fn("Shutting down instance %s" % instance.name)
14722       result = self.rpc.call_instance_shutdown(src_node, instance,
14723                                                self.op.shutdown_timeout)
14724       # TODO: Maybe ignore failures if ignore_remove_failures is set
14725       result.Raise("Could not shutdown instance %s on"
14726                    " node %s" % (instance.name, src_node))
14727
14728     # set the disks ID correctly since call_instance_start needs the
14729     # correct drbd minor to create the symlinks
14730     for disk in instance.disks:
14731       self.cfg.SetDiskID(disk, src_node)
14732
14733     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14734
14735     if activate_disks:
14736       # Activate the instance disks if we'exporting a stopped instance
14737       feedback_fn("Activating disks for %s" % instance.name)
14738       _StartInstanceDisks(self, instance, None)
14739
14740     try:
14741       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14742                                                      instance)
14743
14744       helper.CreateSnapshots()
14745       try:
14746         if (self.op.shutdown and
14747             instance.admin_state == constants.ADMINST_UP and
14748             not self.op.remove_instance):
14749           assert not activate_disks
14750           feedback_fn("Starting instance %s" % instance.name)
14751           result = self.rpc.call_instance_start(src_node,
14752                                                 (instance, None, None), False)
14753           msg = result.fail_msg
14754           if msg:
14755             feedback_fn("Failed to start instance: %s" % msg)
14756             _ShutdownInstanceDisks(self, instance)
14757             raise errors.OpExecError("Could not start instance: %s" % msg)
14758
14759         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14760           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14761         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14762           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14763           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14764
14765           (key_name, _, _) = self.x509_key_name
14766
14767           dest_ca_pem = \
14768             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14769                                             self.dest_x509_ca)
14770
14771           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14772                                                      key_name, dest_ca_pem,
14773                                                      timeouts)
14774       finally:
14775         helper.Cleanup()
14776
14777       # Check for backwards compatibility
14778       assert len(dresults) == len(instance.disks)
14779       assert compat.all(isinstance(i, bool) for i in dresults), \
14780              "Not all results are boolean: %r" % dresults
14781
14782     finally:
14783       if activate_disks:
14784         feedback_fn("Deactivating disks for %s" % instance.name)
14785         _ShutdownInstanceDisks(self, instance)
14786
14787     if not (compat.all(dresults) and fin_resu):
14788       failures = []
14789       if not fin_resu:
14790         failures.append("export finalization")
14791       if not compat.all(dresults):
14792         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14793                                if not dsk)
14794         failures.append("disk export: disk(s) %s" % fdsk)
14795
14796       raise errors.OpExecError("Export failed, errors in %s" %
14797                                utils.CommaJoin(failures))
14798
14799     # At this point, the export was successful, we can cleanup/finish
14800
14801     # Remove instance if requested
14802     if self.op.remove_instance:
14803       feedback_fn("Removing instance %s" % instance.name)
14804       _RemoveInstance(self, feedback_fn, instance,
14805                       self.op.ignore_remove_failures)
14806
14807     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14808       self._CleanupExports(feedback_fn)
14809
14810     return fin_resu, dresults
14811
14812
14813 class LUBackupRemove(NoHooksLU):
14814   """Remove exports related to the named instance.
14815
14816   """
14817   REQ_BGL = False
14818
14819   def ExpandNames(self):
14820     self.needed_locks = {
14821       # We need all nodes to be locked in order for RemoveExport to work, but
14822       # we don't need to lock the instance itself, as nothing will happen to it
14823       # (and we can remove exports also for a removed instance)
14824       locking.LEVEL_NODE: locking.ALL_SET,
14825
14826       # Removing backups is quick, so blocking allocations is justified
14827       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14828       }
14829
14830     # Allocations should be stopped while this LU runs with node locks, but it
14831     # doesn't have to be exclusive
14832     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14833
14834   def Exec(self, feedback_fn):
14835     """Remove any export.
14836
14837     """
14838     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14839     # If the instance was not found we'll try with the name that was passed in.
14840     # This will only work if it was an FQDN, though.
14841     fqdn_warn = False
14842     if not instance_name:
14843       fqdn_warn = True
14844       instance_name = self.op.instance_name
14845
14846     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14847     exportlist = self.rpc.call_export_list(locked_nodes)
14848     found = False
14849     for node in exportlist:
14850       msg = exportlist[node].fail_msg
14851       if msg:
14852         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14853         continue
14854       if instance_name in exportlist[node].payload:
14855         found = True
14856         result = self.rpc.call_export_remove(node, instance_name)
14857         msg = result.fail_msg
14858         if msg:
14859           logging.error("Could not remove export for instance %s"
14860                         " on node %s: %s", instance_name, node, msg)
14861
14862     if fqdn_warn and not found:
14863       feedback_fn("Export not found. If trying to remove an export belonging"
14864                   " to a deleted instance please use its Fully Qualified"
14865                   " Domain Name.")
14866
14867
14868 class LUGroupAdd(LogicalUnit):
14869   """Logical unit for creating node groups.
14870
14871   """
14872   HPATH = "group-add"
14873   HTYPE = constants.HTYPE_GROUP
14874   REQ_BGL = False
14875
14876   def ExpandNames(self):
14877     # We need the new group's UUID here so that we can create and acquire the
14878     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14879     # that it should not check whether the UUID exists in the configuration.
14880     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14881     self.needed_locks = {}
14882     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14883
14884   def CheckPrereq(self):
14885     """Check prerequisites.
14886
14887     This checks that the given group name is not an existing node group
14888     already.
14889
14890     """
14891     try:
14892       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14893     except errors.OpPrereqError:
14894       pass
14895     else:
14896       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14897                                  " node group (UUID: %s)" %
14898                                  (self.op.group_name, existing_uuid),
14899                                  errors.ECODE_EXISTS)
14900
14901     if self.op.ndparams:
14902       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14903
14904     if self.op.hv_state:
14905       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14906     else:
14907       self.new_hv_state = None
14908
14909     if self.op.disk_state:
14910       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14911     else:
14912       self.new_disk_state = None
14913
14914     if self.op.diskparams:
14915       for templ in constants.DISK_TEMPLATES:
14916         if templ in self.op.diskparams:
14917           utils.ForceDictType(self.op.diskparams[templ],
14918                               constants.DISK_DT_TYPES)
14919       self.new_diskparams = self.op.diskparams
14920       try:
14921         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14922       except errors.OpPrereqError, err:
14923         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14924                                    errors.ECODE_INVAL)
14925     else:
14926       self.new_diskparams = {}
14927
14928     if self.op.ipolicy:
14929       cluster = self.cfg.GetClusterInfo()
14930       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14931       try:
14932         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14933       except errors.ConfigurationError, err:
14934         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14935                                    errors.ECODE_INVAL)
14936
14937   def BuildHooksEnv(self):
14938     """Build hooks env.
14939
14940     """
14941     return {
14942       "GROUP_NAME": self.op.group_name,
14943       }
14944
14945   def BuildHooksNodes(self):
14946     """Build hooks nodes.
14947
14948     """
14949     mn = self.cfg.GetMasterNode()
14950     return ([mn], [mn])
14951
14952   def Exec(self, feedback_fn):
14953     """Add the node group to the cluster.
14954
14955     """
14956     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14957                                   uuid=self.group_uuid,
14958                                   alloc_policy=self.op.alloc_policy,
14959                                   ndparams=self.op.ndparams,
14960                                   diskparams=self.new_diskparams,
14961                                   ipolicy=self.op.ipolicy,
14962                                   hv_state_static=self.new_hv_state,
14963                                   disk_state_static=self.new_disk_state)
14964
14965     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14966     del self.remove_locks[locking.LEVEL_NODEGROUP]
14967
14968
14969 class LUGroupAssignNodes(NoHooksLU):
14970   """Logical unit for assigning nodes to groups.
14971
14972   """
14973   REQ_BGL = False
14974
14975   def ExpandNames(self):
14976     # These raise errors.OpPrereqError on their own:
14977     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14978     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14979
14980     # We want to lock all the affected nodes and groups. We have readily
14981     # available the list of nodes, and the *destination* group. To gather the
14982     # list of "source" groups, we need to fetch node information later on.
14983     self.needed_locks = {
14984       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14985       locking.LEVEL_NODE: self.op.nodes,
14986       }
14987
14988   def DeclareLocks(self, level):
14989     if level == locking.LEVEL_NODEGROUP:
14990       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14991
14992       # Try to get all affected nodes' groups without having the group or node
14993       # lock yet. Needs verification later in the code flow.
14994       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14995
14996       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14997
14998   def CheckPrereq(self):
14999     """Check prerequisites.
15000
15001     """
15002     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15003     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15004             frozenset(self.op.nodes))
15005
15006     expected_locks = (set([self.group_uuid]) |
15007                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15008     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15009     if actual_locks != expected_locks:
15010       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15011                                " current groups are '%s', used to be '%s'" %
15012                                (utils.CommaJoin(expected_locks),
15013                                 utils.CommaJoin(actual_locks)))
15014
15015     self.node_data = self.cfg.GetAllNodesInfo()
15016     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15017     instance_data = self.cfg.GetAllInstancesInfo()
15018
15019     if self.group is None:
15020       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15021                                (self.op.group_name, self.group_uuid))
15022
15023     (new_splits, previous_splits) = \
15024       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15025                                              for node in self.op.nodes],
15026                                             self.node_data, instance_data)
15027
15028     if new_splits:
15029       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15030
15031       if not self.op.force:
15032         raise errors.OpExecError("The following instances get split by this"
15033                                  " change and --force was not given: %s" %
15034                                  fmt_new_splits)
15035       else:
15036         self.LogWarning("This operation will split the following instances: %s",
15037                         fmt_new_splits)
15038
15039         if previous_splits:
15040           self.LogWarning("In addition, these already-split instances continue"
15041                           " to be split across groups: %s",
15042                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15043
15044   def Exec(self, feedback_fn):
15045     """Assign nodes to a new group.
15046
15047     """
15048     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15049
15050     self.cfg.AssignGroupNodes(mods)
15051
15052   @staticmethod
15053   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15054     """Check for split instances after a node assignment.
15055
15056     This method considers a series of node assignments as an atomic operation,
15057     and returns information about split instances after applying the set of
15058     changes.
15059
15060     In particular, it returns information about newly split instances, and
15061     instances that were already split, and remain so after the change.
15062
15063     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15064     considered.
15065
15066     @type changes: list of (node_name, new_group_uuid) pairs.
15067     @param changes: list of node assignments to consider.
15068     @param node_data: a dict with data for all nodes
15069     @param instance_data: a dict with all instances to consider
15070     @rtype: a two-tuple
15071     @return: a list of instances that were previously okay and result split as a
15072       consequence of this change, and a list of instances that were previously
15073       split and this change does not fix.
15074
15075     """
15076     changed_nodes = dict((node, group) for node, group in changes
15077                          if node_data[node].group != group)
15078
15079     all_split_instances = set()
15080     previously_split_instances = set()
15081
15082     def InstanceNodes(instance):
15083       return [instance.primary_node] + list(instance.secondary_nodes)
15084
15085     for inst in instance_data.values():
15086       if inst.disk_template not in constants.DTS_INT_MIRROR:
15087         continue
15088
15089       instance_nodes = InstanceNodes(inst)
15090
15091       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15092         previously_split_instances.add(inst.name)
15093
15094       if len(set(changed_nodes.get(node, node_data[node].group)
15095                  for node in instance_nodes)) > 1:
15096         all_split_instances.add(inst.name)
15097
15098     return (list(all_split_instances - previously_split_instances),
15099             list(previously_split_instances & all_split_instances))
15100
15101
15102 class _GroupQuery(_QueryBase):
15103   FIELDS = query.GROUP_FIELDS
15104
15105   def ExpandNames(self, lu):
15106     lu.needed_locks = {}
15107
15108     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15109     self._cluster = lu.cfg.GetClusterInfo()
15110     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15111
15112     if not self.names:
15113       self.wanted = [name_to_uuid[name]
15114                      for name in utils.NiceSort(name_to_uuid.keys())]
15115     else:
15116       # Accept names to be either names or UUIDs.
15117       missing = []
15118       self.wanted = []
15119       all_uuid = frozenset(self._all_groups.keys())
15120
15121       for name in self.names:
15122         if name in all_uuid:
15123           self.wanted.append(name)
15124         elif name in name_to_uuid:
15125           self.wanted.append(name_to_uuid[name])
15126         else:
15127           missing.append(name)
15128
15129       if missing:
15130         raise errors.OpPrereqError("Some groups do not exist: %s" %
15131                                    utils.CommaJoin(missing),
15132                                    errors.ECODE_NOENT)
15133
15134   def DeclareLocks(self, lu, level):
15135     pass
15136
15137   def _GetQueryData(self, lu):
15138     """Computes the list of node groups and their attributes.
15139
15140     """
15141     do_nodes = query.GQ_NODE in self.requested_data
15142     do_instances = query.GQ_INST in self.requested_data
15143
15144     group_to_nodes = None
15145     group_to_instances = None
15146
15147     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15148     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15149     # latter GetAllInstancesInfo() is not enough, for we have to go through
15150     # instance->node. Hence, we will need to process nodes even if we only need
15151     # instance information.
15152     if do_nodes or do_instances:
15153       all_nodes = lu.cfg.GetAllNodesInfo()
15154       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15155       node_to_group = {}
15156
15157       for node in all_nodes.values():
15158         if node.group in group_to_nodes:
15159           group_to_nodes[node.group].append(node.name)
15160           node_to_group[node.name] = node.group
15161
15162       if do_instances:
15163         all_instances = lu.cfg.GetAllInstancesInfo()
15164         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15165
15166         for instance in all_instances.values():
15167           node = instance.primary_node
15168           if node in node_to_group:
15169             group_to_instances[node_to_group[node]].append(instance.name)
15170
15171         if not do_nodes:
15172           # Do not pass on node information if it was not requested.
15173           group_to_nodes = None
15174
15175     return query.GroupQueryData(self._cluster,
15176                                 [self._all_groups[uuid]
15177                                  for uuid in self.wanted],
15178                                 group_to_nodes, group_to_instances,
15179                                 query.GQ_DISKPARAMS in self.requested_data)
15180
15181
15182 class LUGroupQuery(NoHooksLU):
15183   """Logical unit for querying node groups.
15184
15185   """
15186   REQ_BGL = False
15187
15188   def CheckArguments(self):
15189     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15190                           self.op.output_fields, False)
15191
15192   def ExpandNames(self):
15193     self.gq.ExpandNames(self)
15194
15195   def DeclareLocks(self, level):
15196     self.gq.DeclareLocks(self, level)
15197
15198   def Exec(self, feedback_fn):
15199     return self.gq.OldStyleQuery(self)
15200
15201
15202 class LUGroupSetParams(LogicalUnit):
15203   """Modifies the parameters of a node group.
15204
15205   """
15206   HPATH = "group-modify"
15207   HTYPE = constants.HTYPE_GROUP
15208   REQ_BGL = False
15209
15210   def CheckArguments(self):
15211     all_changes = [
15212       self.op.ndparams,
15213       self.op.diskparams,
15214       self.op.alloc_policy,
15215       self.op.hv_state,
15216       self.op.disk_state,
15217       self.op.ipolicy,
15218       ]
15219
15220     if all_changes.count(None) == len(all_changes):
15221       raise errors.OpPrereqError("Please pass at least one modification",
15222                                  errors.ECODE_INVAL)
15223
15224   def ExpandNames(self):
15225     # This raises errors.OpPrereqError on its own:
15226     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15227
15228     self.needed_locks = {
15229       locking.LEVEL_INSTANCE: [],
15230       locking.LEVEL_NODEGROUP: [self.group_uuid],
15231       }
15232
15233     self.share_locks[locking.LEVEL_INSTANCE] = 1
15234
15235   def DeclareLocks(self, level):
15236     if level == locking.LEVEL_INSTANCE:
15237       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15238
15239       # Lock instances optimistically, needs verification once group lock has
15240       # been acquired
15241       self.needed_locks[locking.LEVEL_INSTANCE] = \
15242           self.cfg.GetNodeGroupInstances(self.group_uuid)
15243
15244   @staticmethod
15245   def _UpdateAndVerifyDiskParams(old, new):
15246     """Updates and verifies disk parameters.
15247
15248     """
15249     new_params = _GetUpdatedParams(old, new)
15250     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15251     return new_params
15252
15253   def CheckPrereq(self):
15254     """Check prerequisites.
15255
15256     """
15257     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15258
15259     # Check if locked instances are still correct
15260     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15261
15262     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15263     cluster = self.cfg.GetClusterInfo()
15264
15265     if self.group is None:
15266       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15267                                (self.op.group_name, self.group_uuid))
15268
15269     if self.op.ndparams:
15270       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15271       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15272       self.new_ndparams = new_ndparams
15273
15274     if self.op.diskparams:
15275       diskparams = self.group.diskparams
15276       uavdp = self._UpdateAndVerifyDiskParams
15277       # For each disktemplate subdict update and verify the values
15278       new_diskparams = dict((dt,
15279                              uavdp(diskparams.get(dt, {}),
15280                                    self.op.diskparams[dt]))
15281                             for dt in constants.DISK_TEMPLATES
15282                             if dt in self.op.diskparams)
15283       # As we've all subdicts of diskparams ready, lets merge the actual
15284       # dict with all updated subdicts
15285       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15286       try:
15287         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15288       except errors.OpPrereqError, err:
15289         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15290                                    errors.ECODE_INVAL)
15291
15292     if self.op.hv_state:
15293       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15294                                                  self.group.hv_state_static)
15295
15296     if self.op.disk_state:
15297       self.new_disk_state = \
15298         _MergeAndVerifyDiskState(self.op.disk_state,
15299                                  self.group.disk_state_static)
15300
15301     if self.op.ipolicy:
15302       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15303                                             self.op.ipolicy,
15304                                             group_policy=True)
15305
15306       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15307       inst_filter = lambda inst: inst.name in owned_instances
15308       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15309       gmi = ganeti.masterd.instance
15310       violations = \
15311           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15312                                                                   self.group),
15313                                         new_ipolicy, instances)
15314
15315       if violations:
15316         self.LogWarning("After the ipolicy change the following instances"
15317                         " violate them: %s",
15318                         utils.CommaJoin(violations))
15319
15320   def BuildHooksEnv(self):
15321     """Build hooks env.
15322
15323     """
15324     return {
15325       "GROUP_NAME": self.op.group_name,
15326       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15327       }
15328
15329   def BuildHooksNodes(self):
15330     """Build hooks nodes.
15331
15332     """
15333     mn = self.cfg.GetMasterNode()
15334     return ([mn], [mn])
15335
15336   def Exec(self, feedback_fn):
15337     """Modifies the node group.
15338
15339     """
15340     result = []
15341
15342     if self.op.ndparams:
15343       self.group.ndparams = self.new_ndparams
15344       result.append(("ndparams", str(self.group.ndparams)))
15345
15346     if self.op.diskparams:
15347       self.group.diskparams = self.new_diskparams
15348       result.append(("diskparams", str(self.group.diskparams)))
15349
15350     if self.op.alloc_policy:
15351       self.group.alloc_policy = self.op.alloc_policy
15352
15353     if self.op.hv_state:
15354       self.group.hv_state_static = self.new_hv_state
15355
15356     if self.op.disk_state:
15357       self.group.disk_state_static = self.new_disk_state
15358
15359     if self.op.ipolicy:
15360       self.group.ipolicy = self.new_ipolicy
15361
15362     self.cfg.Update(self.group, feedback_fn)
15363     return result
15364
15365
15366 class LUGroupRemove(LogicalUnit):
15367   HPATH = "group-remove"
15368   HTYPE = constants.HTYPE_GROUP
15369   REQ_BGL = False
15370
15371   def ExpandNames(self):
15372     # This will raises errors.OpPrereqError on its own:
15373     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15374     self.needed_locks = {
15375       locking.LEVEL_NODEGROUP: [self.group_uuid],
15376       }
15377
15378   def CheckPrereq(self):
15379     """Check prerequisites.
15380
15381     This checks that the given group name exists as a node group, that is
15382     empty (i.e., contains no nodes), and that is not the last group of the
15383     cluster.
15384
15385     """
15386     # Verify that the group is empty.
15387     group_nodes = [node.name
15388                    for node in self.cfg.GetAllNodesInfo().values()
15389                    if node.group == self.group_uuid]
15390
15391     if group_nodes:
15392       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15393                                  " nodes: %s" %
15394                                  (self.op.group_name,
15395                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15396                                  errors.ECODE_STATE)
15397
15398     # Verify the cluster would not be left group-less.
15399     if len(self.cfg.GetNodeGroupList()) == 1:
15400       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15401                                  " removed" % self.op.group_name,
15402                                  errors.ECODE_STATE)
15403
15404   def BuildHooksEnv(self):
15405     """Build hooks env.
15406
15407     """
15408     return {
15409       "GROUP_NAME": self.op.group_name,
15410       }
15411
15412   def BuildHooksNodes(self):
15413     """Build hooks nodes.
15414
15415     """
15416     mn = self.cfg.GetMasterNode()
15417     return ([mn], [mn])
15418
15419   def Exec(self, feedback_fn):
15420     """Remove the node group.
15421
15422     """
15423     try:
15424       self.cfg.RemoveNodeGroup(self.group_uuid)
15425     except errors.ConfigurationError:
15426       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15427                                (self.op.group_name, self.group_uuid))
15428
15429     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15430
15431
15432 class LUGroupRename(LogicalUnit):
15433   HPATH = "group-rename"
15434   HTYPE = constants.HTYPE_GROUP
15435   REQ_BGL = False
15436
15437   def ExpandNames(self):
15438     # This raises errors.OpPrereqError on its own:
15439     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15440
15441     self.needed_locks = {
15442       locking.LEVEL_NODEGROUP: [self.group_uuid],
15443       }
15444
15445   def CheckPrereq(self):
15446     """Check prerequisites.
15447
15448     Ensures requested new name is not yet used.
15449
15450     """
15451     try:
15452       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15453     except errors.OpPrereqError:
15454       pass
15455     else:
15456       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15457                                  " node group (UUID: %s)" %
15458                                  (self.op.new_name, new_name_uuid),
15459                                  errors.ECODE_EXISTS)
15460
15461   def BuildHooksEnv(self):
15462     """Build hooks env.
15463
15464     """
15465     return {
15466       "OLD_NAME": self.op.group_name,
15467       "NEW_NAME": self.op.new_name,
15468       }
15469
15470   def BuildHooksNodes(self):
15471     """Build hooks nodes.
15472
15473     """
15474     mn = self.cfg.GetMasterNode()
15475
15476     all_nodes = self.cfg.GetAllNodesInfo()
15477     all_nodes.pop(mn, None)
15478
15479     run_nodes = [mn]
15480     run_nodes.extend(node.name for node in all_nodes.values()
15481                      if node.group == self.group_uuid)
15482
15483     return (run_nodes, run_nodes)
15484
15485   def Exec(self, feedback_fn):
15486     """Rename the node group.
15487
15488     """
15489     group = self.cfg.GetNodeGroup(self.group_uuid)
15490
15491     if group is None:
15492       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15493                                (self.op.group_name, self.group_uuid))
15494
15495     group.name = self.op.new_name
15496     self.cfg.Update(group, feedback_fn)
15497
15498     return self.op.new_name
15499
15500
15501 class LUGroupEvacuate(LogicalUnit):
15502   HPATH = "group-evacuate"
15503   HTYPE = constants.HTYPE_GROUP
15504   REQ_BGL = False
15505
15506   def ExpandNames(self):
15507     # This raises errors.OpPrereqError on its own:
15508     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15509
15510     if self.op.target_groups:
15511       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15512                                   self.op.target_groups)
15513     else:
15514       self.req_target_uuids = []
15515
15516     if self.group_uuid in self.req_target_uuids:
15517       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15518                                  " as a target group (targets are %s)" %
15519                                  (self.group_uuid,
15520                                   utils.CommaJoin(self.req_target_uuids)),
15521                                  errors.ECODE_INVAL)
15522
15523     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15524
15525     self.share_locks = _ShareAll()
15526     self.needed_locks = {
15527       locking.LEVEL_INSTANCE: [],
15528       locking.LEVEL_NODEGROUP: [],
15529       locking.LEVEL_NODE: [],
15530       }
15531
15532   def DeclareLocks(self, level):
15533     if level == locking.LEVEL_INSTANCE:
15534       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15535
15536       # Lock instances optimistically, needs verification once node and group
15537       # locks have been acquired
15538       self.needed_locks[locking.LEVEL_INSTANCE] = \
15539         self.cfg.GetNodeGroupInstances(self.group_uuid)
15540
15541     elif level == locking.LEVEL_NODEGROUP:
15542       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15543
15544       if self.req_target_uuids:
15545         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15546
15547         # Lock all groups used by instances optimistically; this requires going
15548         # via the node before it's locked, requiring verification later on
15549         lock_groups.update(group_uuid
15550                            for instance_name in
15551                              self.owned_locks(locking.LEVEL_INSTANCE)
15552                            for group_uuid in
15553                              self.cfg.GetInstanceNodeGroups(instance_name))
15554       else:
15555         # No target groups, need to lock all of them
15556         lock_groups = locking.ALL_SET
15557
15558       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15559
15560     elif level == locking.LEVEL_NODE:
15561       # This will only lock the nodes in the group to be evacuated which
15562       # contain actual instances
15563       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15564       self._LockInstancesNodes()
15565
15566       # Lock all nodes in group to be evacuated and target groups
15567       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15568       assert self.group_uuid in owned_groups
15569       member_nodes = [node_name
15570                       for group in owned_groups
15571                       for node_name in self.cfg.GetNodeGroup(group).members]
15572       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15573
15574   def CheckPrereq(self):
15575     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15576     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15577     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15578
15579     assert owned_groups.issuperset(self.req_target_uuids)
15580     assert self.group_uuid in owned_groups
15581
15582     # Check if locked instances are still correct
15583     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15584
15585     # Get instance information
15586     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15587
15588     # Check if node groups for locked instances are still correct
15589     _CheckInstancesNodeGroups(self.cfg, self.instances,
15590                               owned_groups, owned_nodes, self.group_uuid)
15591
15592     if self.req_target_uuids:
15593       # User requested specific target groups
15594       self.target_uuids = self.req_target_uuids
15595     else:
15596       # All groups except the one to be evacuated are potential targets
15597       self.target_uuids = [group_uuid for group_uuid in owned_groups
15598                            if group_uuid != self.group_uuid]
15599
15600       if not self.target_uuids:
15601         raise errors.OpPrereqError("There are no possible target groups",
15602                                    errors.ECODE_INVAL)
15603
15604   def BuildHooksEnv(self):
15605     """Build hooks env.
15606
15607     """
15608     return {
15609       "GROUP_NAME": self.op.group_name,
15610       "TARGET_GROUPS": " ".join(self.target_uuids),
15611       }
15612
15613   def BuildHooksNodes(self):
15614     """Build hooks nodes.
15615
15616     """
15617     mn = self.cfg.GetMasterNode()
15618
15619     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15620
15621     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15622
15623     return (run_nodes, run_nodes)
15624
15625   def Exec(self, feedback_fn):
15626     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15627
15628     assert self.group_uuid not in self.target_uuids
15629
15630     req = iallocator.IAReqGroupChange(instances=instances,
15631                                       target_groups=self.target_uuids)
15632     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15633
15634     ial.Run(self.op.iallocator)
15635
15636     if not ial.success:
15637       raise errors.OpPrereqError("Can't compute group evacuation using"
15638                                  " iallocator '%s': %s" %
15639                                  (self.op.iallocator, ial.info),
15640                                  errors.ECODE_NORES)
15641
15642     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15643
15644     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15645                  len(jobs), self.op.group_name)
15646
15647     return ResultWithJobs(jobs)
15648
15649
15650 class TagsLU(NoHooksLU): # pylint: disable=W0223
15651   """Generic tags LU.
15652
15653   This is an abstract class which is the parent of all the other tags LUs.
15654
15655   """
15656   def ExpandNames(self):
15657     self.group_uuid = None
15658     self.needed_locks = {}
15659
15660     if self.op.kind == constants.TAG_NODE:
15661       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15662       lock_level = locking.LEVEL_NODE
15663       lock_name = self.op.name
15664     elif self.op.kind == constants.TAG_INSTANCE:
15665       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15666       lock_level = locking.LEVEL_INSTANCE
15667       lock_name = self.op.name
15668     elif self.op.kind == constants.TAG_NODEGROUP:
15669       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15670       lock_level = locking.LEVEL_NODEGROUP
15671       lock_name = self.group_uuid
15672     elif self.op.kind == constants.TAG_NETWORK:
15673       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15674       lock_level = locking.LEVEL_NETWORK
15675       lock_name = self.network_uuid
15676     else:
15677       lock_level = None
15678       lock_name = None
15679
15680     if lock_level and getattr(self.op, "use_locking", True):
15681       self.needed_locks[lock_level] = lock_name
15682
15683     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15684     # not possible to acquire the BGL based on opcode parameters)
15685
15686   def CheckPrereq(self):
15687     """Check prerequisites.
15688
15689     """
15690     if self.op.kind == constants.TAG_CLUSTER:
15691       self.target = self.cfg.GetClusterInfo()
15692     elif self.op.kind == constants.TAG_NODE:
15693       self.target = self.cfg.GetNodeInfo(self.op.name)
15694     elif self.op.kind == constants.TAG_INSTANCE:
15695       self.target = self.cfg.GetInstanceInfo(self.op.name)
15696     elif self.op.kind == constants.TAG_NODEGROUP:
15697       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15698     elif self.op.kind == constants.TAG_NETWORK:
15699       self.target = self.cfg.GetNetwork(self.network_uuid)
15700     else:
15701       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15702                                  str(self.op.kind), errors.ECODE_INVAL)
15703
15704
15705 class LUTagsGet(TagsLU):
15706   """Returns the tags of a given object.
15707
15708   """
15709   REQ_BGL = False
15710
15711   def ExpandNames(self):
15712     TagsLU.ExpandNames(self)
15713
15714     # Share locks as this is only a read operation
15715     self.share_locks = _ShareAll()
15716
15717   def Exec(self, feedback_fn):
15718     """Returns the tag list.
15719
15720     """
15721     return list(self.target.GetTags())
15722
15723
15724 class LUTagsSearch(NoHooksLU):
15725   """Searches the tags for a given pattern.
15726
15727   """
15728   REQ_BGL = False
15729
15730   def ExpandNames(self):
15731     self.needed_locks = {}
15732
15733   def CheckPrereq(self):
15734     """Check prerequisites.
15735
15736     This checks the pattern passed for validity by compiling it.
15737
15738     """
15739     try:
15740       self.re = re.compile(self.op.pattern)
15741     except re.error, err:
15742       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15743                                  (self.op.pattern, err), errors.ECODE_INVAL)
15744
15745   def Exec(self, feedback_fn):
15746     """Returns the tag list.
15747
15748     """
15749     cfg = self.cfg
15750     tgts = [("/cluster", cfg.GetClusterInfo())]
15751     ilist = cfg.GetAllInstancesInfo().values()
15752     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15753     nlist = cfg.GetAllNodesInfo().values()
15754     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15755     tgts.extend(("/nodegroup/%s" % n.name, n)
15756                 for n in cfg.GetAllNodeGroupsInfo().values())
15757     results = []
15758     for path, target in tgts:
15759       for tag in target.GetTags():
15760         if self.re.search(tag):
15761           results.append((path, tag))
15762     return results
15763
15764
15765 class LUTagsSet(TagsLU):
15766   """Sets a tag on a given object.
15767
15768   """
15769   REQ_BGL = False
15770
15771   def CheckPrereq(self):
15772     """Check prerequisites.
15773
15774     This checks the type and length of the tag name and value.
15775
15776     """
15777     TagsLU.CheckPrereq(self)
15778     for tag in self.op.tags:
15779       objects.TaggableObject.ValidateTag(tag)
15780
15781   def Exec(self, feedback_fn):
15782     """Sets the tag.
15783
15784     """
15785     try:
15786       for tag in self.op.tags:
15787         self.target.AddTag(tag)
15788     except errors.TagError, err:
15789       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15790     self.cfg.Update(self.target, feedback_fn)
15791
15792
15793 class LUTagsDel(TagsLU):
15794   """Delete a list of tags from a given object.
15795
15796   """
15797   REQ_BGL = False
15798
15799   def CheckPrereq(self):
15800     """Check prerequisites.
15801
15802     This checks that we have the given tag.
15803
15804     """
15805     TagsLU.CheckPrereq(self)
15806     for tag in self.op.tags:
15807       objects.TaggableObject.ValidateTag(tag)
15808     del_tags = frozenset(self.op.tags)
15809     cur_tags = self.target.GetTags()
15810
15811     diff_tags = del_tags - cur_tags
15812     if diff_tags:
15813       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15814       raise errors.OpPrereqError("Tag(s) %s not found" %
15815                                  (utils.CommaJoin(diff_names), ),
15816                                  errors.ECODE_NOENT)
15817
15818   def Exec(self, feedback_fn):
15819     """Remove the tag from the object.
15820
15821     """
15822     for tag in self.op.tags:
15823       self.target.RemoveTag(tag)
15824     self.cfg.Update(self.target, feedback_fn)
15825
15826
15827 class LUTestDelay(NoHooksLU):
15828   """Sleep for a specified amount of time.
15829
15830   This LU sleeps on the master and/or nodes for a specified amount of
15831   time.
15832
15833   """
15834   REQ_BGL = False
15835
15836   def ExpandNames(self):
15837     """Expand names and set required locks.
15838
15839     This expands the node list, if any.
15840
15841     """
15842     self.needed_locks = {}
15843     if self.op.on_nodes:
15844       # _GetWantedNodes can be used here, but is not always appropriate to use
15845       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15846       # more information.
15847       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15848       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15849
15850   def _TestDelay(self):
15851     """Do the actual sleep.
15852
15853     """
15854     if self.op.on_master:
15855       if not utils.TestDelay(self.op.duration):
15856         raise errors.OpExecError("Error during master delay test")
15857     if self.op.on_nodes:
15858       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15859       for node, node_result in result.items():
15860         node_result.Raise("Failure during rpc call to node %s" % node)
15861
15862   def Exec(self, feedback_fn):
15863     """Execute the test delay opcode, with the wanted repetitions.
15864
15865     """
15866     if self.op.repeat == 0:
15867       self._TestDelay()
15868     else:
15869       top_value = self.op.repeat - 1
15870       for i in range(self.op.repeat):
15871         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15872         self._TestDelay()
15873
15874
15875 class LURestrictedCommand(NoHooksLU):
15876   """Logical unit for executing restricted commands.
15877
15878   """
15879   REQ_BGL = False
15880
15881   def ExpandNames(self):
15882     if self.op.nodes:
15883       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15884
15885     self.needed_locks = {
15886       locking.LEVEL_NODE: self.op.nodes,
15887       }
15888     self.share_locks = {
15889       locking.LEVEL_NODE: not self.op.use_locking,
15890       }
15891
15892   def CheckPrereq(self):
15893     """Check prerequisites.
15894
15895     """
15896
15897   def Exec(self, feedback_fn):
15898     """Execute restricted command and return output.
15899
15900     """
15901     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15902
15903     # Check if correct locks are held
15904     assert set(self.op.nodes).issubset(owned_nodes)
15905
15906     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15907
15908     result = []
15909
15910     for node_name in self.op.nodes:
15911       nres = rpcres[node_name]
15912       if nres.fail_msg:
15913         msg = ("Command '%s' on node '%s' failed: %s" %
15914                (self.op.command, node_name, nres.fail_msg))
15915         result.append((False, msg))
15916       else:
15917         result.append((True, nres.payload))
15918
15919     return result
15920
15921
15922 class LUTestJqueue(NoHooksLU):
15923   """Utility LU to test some aspects of the job queue.
15924
15925   """
15926   REQ_BGL = False
15927
15928   # Must be lower than default timeout for WaitForJobChange to see whether it
15929   # notices changed jobs
15930   _CLIENT_CONNECT_TIMEOUT = 20.0
15931   _CLIENT_CONFIRM_TIMEOUT = 60.0
15932
15933   @classmethod
15934   def _NotifyUsingSocket(cls, cb, errcls):
15935     """Opens a Unix socket and waits for another program to connect.
15936
15937     @type cb: callable
15938     @param cb: Callback to send socket name to client
15939     @type errcls: class
15940     @param errcls: Exception class to use for errors
15941
15942     """
15943     # Using a temporary directory as there's no easy way to create temporary
15944     # sockets without writing a custom loop around tempfile.mktemp and
15945     # socket.bind
15946     tmpdir = tempfile.mkdtemp()
15947     try:
15948       tmpsock = utils.PathJoin(tmpdir, "sock")
15949
15950       logging.debug("Creating temporary socket at %s", tmpsock)
15951       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15952       try:
15953         sock.bind(tmpsock)
15954         sock.listen(1)
15955
15956         # Send details to client
15957         cb(tmpsock)
15958
15959         # Wait for client to connect before continuing
15960         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15961         try:
15962           (conn, _) = sock.accept()
15963         except socket.error, err:
15964           raise errcls("Client didn't connect in time (%s)" % err)
15965       finally:
15966         sock.close()
15967     finally:
15968       # Remove as soon as client is connected
15969       shutil.rmtree(tmpdir)
15970
15971     # Wait for client to close
15972     try:
15973       try:
15974         # pylint: disable=E1101
15975         # Instance of '_socketobject' has no ... member
15976         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15977         conn.recv(1)
15978       except socket.error, err:
15979         raise errcls("Client failed to confirm notification (%s)" % err)
15980     finally:
15981       conn.close()
15982
15983   def _SendNotification(self, test, arg, sockname):
15984     """Sends a notification to the client.
15985
15986     @type test: string
15987     @param test: Test name
15988     @param arg: Test argument (depends on test)
15989     @type sockname: string
15990     @param sockname: Socket path
15991
15992     """
15993     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15994
15995   def _Notify(self, prereq, test, arg):
15996     """Notifies the client of a test.
15997
15998     @type prereq: bool
15999     @param prereq: Whether this is a prereq-phase test
16000     @type test: string
16001     @param test: Test name
16002     @param arg: Test argument (depends on test)
16003
16004     """
16005     if prereq:
16006       errcls = errors.OpPrereqError
16007     else:
16008       errcls = errors.OpExecError
16009
16010     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16011                                                   test, arg),
16012                                    errcls)
16013
16014   def CheckArguments(self):
16015     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16016     self.expandnames_calls = 0
16017
16018   def ExpandNames(self):
16019     checkargs_calls = getattr(self, "checkargs_calls", 0)
16020     if checkargs_calls < 1:
16021       raise errors.ProgrammerError("CheckArguments was not called")
16022
16023     self.expandnames_calls += 1
16024
16025     if self.op.notify_waitlock:
16026       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16027
16028     self.LogInfo("Expanding names")
16029
16030     # Get lock on master node (just to get a lock, not for a particular reason)
16031     self.needed_locks = {
16032       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16033       }
16034
16035   def Exec(self, feedback_fn):
16036     if self.expandnames_calls < 1:
16037       raise errors.ProgrammerError("ExpandNames was not called")
16038
16039     if self.op.notify_exec:
16040       self._Notify(False, constants.JQT_EXEC, None)
16041
16042     self.LogInfo("Executing")
16043
16044     if self.op.log_messages:
16045       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16046       for idx, msg in enumerate(self.op.log_messages):
16047         self.LogInfo("Sending log message %s", idx + 1)
16048         feedback_fn(constants.JQT_MSGPREFIX + msg)
16049         # Report how many test messages have been sent
16050         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16051
16052     if self.op.fail:
16053       raise errors.OpExecError("Opcode failure was requested")
16054
16055     return True
16056
16057
16058 class LUTestAllocator(NoHooksLU):
16059   """Run allocator tests.
16060
16061   This LU runs the allocator tests
16062
16063   """
16064   def CheckPrereq(self):
16065     """Check prerequisites.
16066
16067     This checks the opcode parameters depending on the director and mode test.
16068
16069     """
16070     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16071                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16072       for attr in ["memory", "disks", "disk_template",
16073                    "os", "tags", "nics", "vcpus"]:
16074         if not hasattr(self.op, attr):
16075           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16076                                      attr, errors.ECODE_INVAL)
16077       iname = self.cfg.ExpandInstanceName(self.op.name)
16078       if iname is not None:
16079         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16080                                    iname, errors.ECODE_EXISTS)
16081       if not isinstance(self.op.nics, list):
16082         raise errors.OpPrereqError("Invalid parameter 'nics'",
16083                                    errors.ECODE_INVAL)
16084       if not isinstance(self.op.disks, list):
16085         raise errors.OpPrereqError("Invalid parameter 'disks'",
16086                                    errors.ECODE_INVAL)
16087       for row in self.op.disks:
16088         if (not isinstance(row, dict) or
16089             constants.IDISK_SIZE not in row or
16090             not isinstance(row[constants.IDISK_SIZE], int) or
16091             constants.IDISK_MODE not in row or
16092             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16093           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16094                                      " parameter", errors.ECODE_INVAL)
16095       if self.op.hypervisor is None:
16096         self.op.hypervisor = self.cfg.GetHypervisorType()
16097     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16098       fname = _ExpandInstanceName(self.cfg, self.op.name)
16099       self.op.name = fname
16100       self.relocate_from = \
16101           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16102     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16103                           constants.IALLOCATOR_MODE_NODE_EVAC):
16104       if not self.op.instances:
16105         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16106       self.op.instances = _GetWantedInstances(self, self.op.instances)
16107     else:
16108       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16109                                  self.op.mode, errors.ECODE_INVAL)
16110
16111     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16112       if self.op.iallocator is None:
16113         raise errors.OpPrereqError("Missing allocator name",
16114                                    errors.ECODE_INVAL)
16115     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16116       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16117                                  self.op.direction, errors.ECODE_INVAL)
16118
16119   def Exec(self, feedback_fn):
16120     """Run the allocator test.
16121
16122     """
16123     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16124       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16125                                           memory=self.op.memory,
16126                                           disks=self.op.disks,
16127                                           disk_template=self.op.disk_template,
16128                                           os=self.op.os,
16129                                           tags=self.op.tags,
16130                                           nics=self.op.nics,
16131                                           vcpus=self.op.vcpus,
16132                                           spindle_use=self.op.spindle_use,
16133                                           hypervisor=self.op.hypervisor)
16134     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16135       req = iallocator.IAReqRelocate(name=self.op.name,
16136                                      relocate_from=list(self.relocate_from))
16137     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16138       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16139                                         target_groups=self.op.target_groups)
16140     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16141       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16142                                      evac_mode=self.op.evac_mode)
16143     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16144       disk_template = self.op.disk_template
16145       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16146                                              memory=self.op.memory,
16147                                              disks=self.op.disks,
16148                                              disk_template=disk_template,
16149                                              os=self.op.os,
16150                                              tags=self.op.tags,
16151                                              nics=self.op.nics,
16152                                              vcpus=self.op.vcpus,
16153                                              spindle_use=self.op.spindle_use,
16154                                              hypervisor=self.op.hypervisor)
16155                for idx in range(self.op.count)]
16156       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16157     else:
16158       raise errors.ProgrammerError("Uncatched mode %s in"
16159                                    " LUTestAllocator.Exec", self.op.mode)
16160
16161     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16162     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16163       result = ial.in_text
16164     else:
16165       ial.Run(self.op.iallocator, validate=False)
16166       result = ial.out_text
16167     return result
16168
16169
16170 class LUNetworkAdd(LogicalUnit):
16171   """Logical unit for creating networks.
16172
16173   """
16174   HPATH = "network-add"
16175   HTYPE = constants.HTYPE_NETWORK
16176   REQ_BGL = False
16177
16178   def BuildHooksNodes(self):
16179     """Build hooks nodes.
16180
16181     """
16182     mn = self.cfg.GetMasterNode()
16183     return ([mn], [mn])
16184
16185   def CheckArguments(self):
16186     if self.op.mac_prefix:
16187       self.op.mac_prefix = \
16188         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16189
16190   def ExpandNames(self):
16191     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16192
16193     if self.op.conflicts_check:
16194       self.share_locks[locking.LEVEL_NODE] = 1
16195       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16196       self.needed_locks = {
16197         locking.LEVEL_NODE: locking.ALL_SET,
16198         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16199         }
16200     else:
16201       self.needed_locks = {}
16202
16203     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16204
16205   def CheckPrereq(self):
16206     if self.op.network is None:
16207       raise errors.OpPrereqError("Network must be given",
16208                                  errors.ECODE_INVAL)
16209
16210     uuid = self.cfg.LookupNetwork(self.op.network_name)
16211
16212     if uuid:
16213       raise errors.OpPrereqError(("Network with name '%s' already exists" %
16214                                   self.op.network_name), errors.ECODE_EXISTS)
16215
16216     # Check tag validity
16217     for tag in self.op.tags:
16218       objects.TaggableObject.ValidateTag(tag)
16219
16220   def BuildHooksEnv(self):
16221     """Build hooks env.
16222
16223     """
16224     args = {
16225       "name": self.op.network_name,
16226       "subnet": self.op.network,
16227       "gateway": self.op.gateway,
16228       "network6": self.op.network6,
16229       "gateway6": self.op.gateway6,
16230       "mac_prefix": self.op.mac_prefix,
16231       "tags": self.op.tags,
16232       }
16233     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16234
16235   def Exec(self, feedback_fn):
16236     """Add the ip pool to the cluster.
16237
16238     """
16239     nobj = objects.Network(name=self.op.network_name,
16240                            network=self.op.network,
16241                            gateway=self.op.gateway,
16242                            network6=self.op.network6,
16243                            gateway6=self.op.gateway6,
16244                            mac_prefix=self.op.mac_prefix,
16245                            uuid=self.network_uuid,
16246                            family=constants.IP4_VERSION)
16247     # Initialize the associated address pool
16248     try:
16249       pool = network.AddressPool.InitializeNetwork(nobj)
16250     except errors.AddressPoolError, e:
16251       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16252
16253     # Check if we need to reserve the nodes and the cluster master IP
16254     # These may not be allocated to any instances in routed mode, as
16255     # they wouldn't function anyway.
16256     if self.op.conflicts_check:
16257       for node in self.cfg.GetAllNodesInfo().values():
16258         for ip in [node.primary_ip, node.secondary_ip]:
16259           try:
16260             if pool.Contains(ip):
16261               pool.Reserve(ip)
16262               self.LogInfo("Reserved IP address of node '%s' (%s)",
16263                            node.name, ip)
16264           except errors.AddressPoolError:
16265             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16266                             node.name, ip)
16267
16268       master_ip = self.cfg.GetClusterInfo().master_ip
16269       try:
16270         if pool.Contains(master_ip):
16271           pool.Reserve(master_ip)
16272           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16273       except errors.AddressPoolError:
16274         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16275                         master_ip)
16276
16277     if self.op.add_reserved_ips:
16278       for ip in self.op.add_reserved_ips:
16279         try:
16280           pool.Reserve(ip, external=True)
16281         except errors.AddressPoolError, e:
16282           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16283
16284     if self.op.tags:
16285       for tag in self.op.tags:
16286         nobj.AddTag(tag)
16287
16288     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16289     del self.remove_locks[locking.LEVEL_NETWORK]
16290
16291
16292 class LUNetworkRemove(LogicalUnit):
16293   HPATH = "network-remove"
16294   HTYPE = constants.HTYPE_NETWORK
16295   REQ_BGL = False
16296
16297   def ExpandNames(self):
16298     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16299
16300     if not self.network_uuid:
16301       raise errors.OpPrereqError(("Network '%s' not found" %
16302                                   self.op.network_name), errors.ECODE_NOENT)
16303
16304     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16305     self.needed_locks = {
16306       locking.LEVEL_NETWORK: [self.network_uuid],
16307       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16308       }
16309
16310   def CheckPrereq(self):
16311     """Check prerequisites.
16312
16313     This checks that the given network name exists as a network, that is
16314     empty (i.e., contains no nodes), and that is not the last group of the
16315     cluster.
16316
16317     """
16318     # Verify that the network is not conncted.
16319     node_groups = [group.name
16320                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16321                    if self.network_uuid in group.networks]
16322
16323     if node_groups:
16324       self.LogWarning("Network '%s' is connected to the following"
16325                       " node groups: %s" %
16326                       (self.op.network_name,
16327                        utils.CommaJoin(utils.NiceSort(node_groups))))
16328       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16329
16330   def BuildHooksEnv(self):
16331     """Build hooks env.
16332
16333     """
16334     return {
16335       "NETWORK_NAME": self.op.network_name,
16336       }
16337
16338   def BuildHooksNodes(self):
16339     """Build hooks nodes.
16340
16341     """
16342     mn = self.cfg.GetMasterNode()
16343     return ([mn], [mn])
16344
16345   def Exec(self, feedback_fn):
16346     """Remove the network.
16347
16348     """
16349     try:
16350       self.cfg.RemoveNetwork(self.network_uuid)
16351     except errors.ConfigurationError:
16352       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16353                                (self.op.network_name, self.network_uuid))
16354
16355
16356 class LUNetworkSetParams(LogicalUnit):
16357   """Modifies the parameters of a network.
16358
16359   """
16360   HPATH = "network-modify"
16361   HTYPE = constants.HTYPE_NETWORK
16362   REQ_BGL = False
16363
16364   def CheckArguments(self):
16365     if (self.op.gateway and
16366         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16367       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16368                                  " at once", errors.ECODE_INVAL)
16369
16370   def ExpandNames(self):
16371     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16372     if self.network_uuid is None:
16373       raise errors.OpPrereqError(("Network '%s' not found" %
16374                                   self.op.network_name), errors.ECODE_NOENT)
16375
16376     self.needed_locks = {
16377       locking.LEVEL_NETWORK: [self.network_uuid],
16378       }
16379
16380   def CheckPrereq(self):
16381     """Check prerequisites.
16382
16383     """
16384     self.network = self.cfg.GetNetwork(self.network_uuid)
16385     self.gateway = self.network.gateway
16386     self.mac_prefix = self.network.mac_prefix
16387     self.network6 = self.network.network6
16388     self.gateway6 = self.network.gateway6
16389     self.tags = self.network.tags
16390
16391     self.pool = network.AddressPool(self.network)
16392
16393     if self.op.gateway:
16394       if self.op.gateway == constants.VALUE_NONE:
16395         self.gateway = None
16396       else:
16397         self.gateway = self.op.gateway
16398         if self.pool.IsReserved(self.gateway):
16399           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16400                                      " reserved" % self.gateway,
16401                                      errors.ECODE_STATE)
16402
16403     if self.op.mac_prefix:
16404       if self.op.mac_prefix == constants.VALUE_NONE:
16405         self.mac_prefix = None
16406       else:
16407         self.mac_prefix = \
16408           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16409
16410     if self.op.gateway6:
16411       if self.op.gateway6 == constants.VALUE_NONE:
16412         self.gateway6 = None
16413       else:
16414         self.gateway6 = self.op.gateway6
16415
16416     if self.op.network6:
16417       if self.op.network6 == constants.VALUE_NONE:
16418         self.network6 = None
16419       else:
16420         self.network6 = self.op.network6
16421
16422   def BuildHooksEnv(self):
16423     """Build hooks env.
16424
16425     """
16426     args = {
16427       "name": self.op.network_name,
16428       "subnet": self.network.network,
16429       "gateway": self.gateway,
16430       "network6": self.network6,
16431       "gateway6": self.gateway6,
16432       "mac_prefix": self.mac_prefix,
16433       "tags": self.tags,
16434       }
16435     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16436
16437   def BuildHooksNodes(self):
16438     """Build hooks nodes.
16439
16440     """
16441     mn = self.cfg.GetMasterNode()
16442     return ([mn], [mn])
16443
16444   def Exec(self, feedback_fn):
16445     """Modifies the network.
16446
16447     """
16448     #TODO: reserve/release via temporary reservation manager
16449     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16450     if self.op.gateway:
16451       if self.gateway == self.network.gateway:
16452         self.LogWarning("Gateway is already %s", self.gateway)
16453       else:
16454         if self.gateway:
16455           self.pool.Reserve(self.gateway, external=True)
16456         if self.network.gateway:
16457           self.pool.Release(self.network.gateway, external=True)
16458         self.network.gateway = self.gateway
16459
16460     if self.op.add_reserved_ips:
16461       for ip in self.op.add_reserved_ips:
16462         try:
16463           if self.pool.IsReserved(ip):
16464             self.LogWarning("IP address %s is already reserved", ip)
16465           else:
16466             self.pool.Reserve(ip, external=True)
16467         except errors.AddressPoolError, err:
16468           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16469
16470     if self.op.remove_reserved_ips:
16471       for ip in self.op.remove_reserved_ips:
16472         if ip == self.network.gateway:
16473           self.LogWarning("Cannot unreserve Gateway's IP")
16474           continue
16475         try:
16476           if not self.pool.IsReserved(ip):
16477             self.LogWarning("IP address %s is already unreserved", ip)
16478           else:
16479             self.pool.Release(ip, external=True)
16480         except errors.AddressPoolError, err:
16481           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16482
16483     if self.op.mac_prefix:
16484       self.network.mac_prefix = self.mac_prefix
16485
16486     if self.op.network6:
16487       self.network.network6 = self.network6
16488
16489     if self.op.gateway6:
16490       self.network.gateway6 = self.gateway6
16491
16492     self.pool.Validate()
16493
16494     self.cfg.Update(self.network, feedback_fn)
16495
16496
16497 class _NetworkQuery(_QueryBase):
16498   FIELDS = query.NETWORK_FIELDS
16499
16500   def ExpandNames(self, lu):
16501     lu.needed_locks = {}
16502     lu.share_locks = _ShareAll()
16503
16504     self.do_locking = self.use_locking
16505
16506     all_networks = lu.cfg.GetAllNetworksInfo()
16507     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16508
16509     if self.names:
16510       missing = []
16511       self.wanted = []
16512
16513       for name in self.names:
16514         if name in name_to_uuid:
16515           self.wanted.append(name_to_uuid[name])
16516         else:
16517           missing.append(name)
16518
16519       if missing:
16520         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16521                                    errors.ECODE_NOENT)
16522     else:
16523       self.wanted = locking.ALL_SET
16524
16525     if self.do_locking:
16526       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16527       if query.NETQ_INST in self.requested_data:
16528         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16529       if query.NETQ_GROUP in self.requested_data:
16530         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16531
16532   def DeclareLocks(self, lu, level):
16533     pass
16534
16535   def _GetQueryData(self, lu):
16536     """Computes the list of networks and their attributes.
16537
16538     """
16539     all_networks = lu.cfg.GetAllNetworksInfo()
16540
16541     network_uuids = self._GetNames(lu, all_networks.keys(),
16542                                    locking.LEVEL_NETWORK)
16543
16544     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16545
16546     do_instances = query.NETQ_INST in self.requested_data
16547     do_groups = query.NETQ_GROUP in self.requested_data
16548
16549     network_to_instances = None
16550     network_to_groups = None
16551
16552     # For NETQ_GROUP, we need to map network->[groups]
16553     if do_groups:
16554       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16555       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16556       for _, group in all_groups.iteritems():
16557         for net_uuid in network_uuids:
16558           netparams = group.networks.get(net_uuid, None)
16559           if netparams:
16560             info = (group.name, netparams[constants.NIC_MODE],
16561                     netparams[constants.NIC_LINK])
16562
16563             network_to_groups[net_uuid].append(info)
16564
16565     if do_instances:
16566       all_instances = lu.cfg.GetAllInstancesInfo()
16567       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16568       for instance in all_instances.values():
16569         for nic in instance.nics:
16570           if nic.network:
16571             net_uuid = name_to_uuid[nic.network]
16572             if net_uuid in network_uuids:
16573               network_to_instances[net_uuid].append(instance.name)
16574             break
16575
16576     if query.NETQ_STATS in self.requested_data:
16577       stats = \
16578         dict((uuid,
16579               self._GetStats(network.AddressPool(all_networks[uuid])))
16580              for uuid in network_uuids)
16581     else:
16582       stats = None
16583
16584     return query.NetworkQueryData([all_networks[uuid]
16585                                    for uuid in network_uuids],
16586                                    network_to_groups,
16587                                    network_to_instances,
16588                                    stats)
16589
16590   @staticmethod
16591   def _GetStats(pool):
16592     """Returns statistics for a network address pool.
16593
16594     """
16595     return {
16596       "free_count": pool.GetFreeCount(),
16597       "reserved_count": pool.GetReservedCount(),
16598       "map": pool.GetMap(),
16599       "external_reservations":
16600         utils.CommaJoin(pool.GetExternalReservations()),
16601       }
16602
16603
16604 class LUNetworkQuery(NoHooksLU):
16605   """Logical unit for querying networks.
16606
16607   """
16608   REQ_BGL = False
16609
16610   def CheckArguments(self):
16611     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16612                             self.op.output_fields, self.op.use_locking)
16613
16614   def ExpandNames(self):
16615     self.nq.ExpandNames(self)
16616
16617   def Exec(self, feedback_fn):
16618     return self.nq.OldStyleQuery(self)
16619
16620
16621 class LUNetworkConnect(LogicalUnit):
16622   """Connect a network to a nodegroup
16623
16624   """
16625   HPATH = "network-connect"
16626   HTYPE = constants.HTYPE_NETWORK
16627   REQ_BGL = False
16628
16629   def ExpandNames(self):
16630     self.network_name = self.op.network_name
16631     self.group_name = self.op.group_name
16632     self.network_mode = self.op.network_mode
16633     self.network_link = self.op.network_link
16634
16635     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16636     if self.network_uuid is None:
16637       raise errors.OpPrereqError("Network '%s' does not exist" %
16638                                  self.network_name, errors.ECODE_NOENT)
16639
16640     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16641     if self.group_uuid is None:
16642       raise errors.OpPrereqError("Group '%s' does not exist" %
16643                                  self.group_name, errors.ECODE_NOENT)
16644
16645     self.needed_locks = {
16646       locking.LEVEL_INSTANCE: [],
16647       locking.LEVEL_NODEGROUP: [self.group_uuid],
16648       }
16649     self.share_locks[locking.LEVEL_INSTANCE] = 1
16650
16651     if self.op.conflicts_check:
16652       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16653       self.share_locks[locking.LEVEL_NETWORK] = 1
16654
16655   def DeclareLocks(self, level):
16656     if level == locking.LEVEL_INSTANCE:
16657       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16658
16659       # Lock instances optimistically, needs verification once group lock has
16660       # been acquired
16661       if self.op.conflicts_check:
16662         self.needed_locks[locking.LEVEL_INSTANCE] = \
16663             self.cfg.GetNodeGroupInstances(self.group_uuid)
16664
16665   def BuildHooksEnv(self):
16666     ret = {
16667       "GROUP_NAME": self.group_name,
16668       "GROUP_NETWORK_MODE": self.network_mode,
16669       "GROUP_NETWORK_LINK": self.network_link,
16670       }
16671     return ret
16672
16673   def BuildHooksNodes(self):
16674     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16675     return (nodes, nodes)
16676
16677   def CheckPrereq(self):
16678     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16679
16680     assert self.group_uuid in owned_groups
16681
16682     self.netparams = {
16683       constants.NIC_MODE: self.network_mode,
16684       constants.NIC_LINK: self.network_link,
16685       }
16686     objects.NIC.CheckParameterSyntax(self.netparams)
16687
16688     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16689     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16690     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16691     self.connected = False
16692     if self.network_uuid in self.group.networks:
16693       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16694                       (self.network_name, self.group.name))
16695       self.connected = True
16696       return
16697
16698     if self.op.conflicts_check:
16699       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16700
16701       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16702                             "connect to")
16703
16704   def Exec(self, feedback_fn):
16705     if self.connected:
16706       return
16707
16708     self.group.networks[self.network_uuid] = self.netparams
16709     self.cfg.Update(self.group, feedback_fn)
16710
16711
16712 def _NetworkConflictCheck(lu, check_fn, action):
16713   """Checks for network interface conflicts with a network.
16714
16715   @type lu: L{LogicalUnit}
16716   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16717     returning boolean
16718   @param check_fn: Function checking for conflict
16719   @type action: string
16720   @param action: Part of error message (see code)
16721   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16722
16723   """
16724   # Check if locked instances are still correct
16725   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16726   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16727
16728   conflicts = []
16729
16730   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16731     instconflicts = [(idx, nic.ip)
16732                      for (idx, nic) in enumerate(instance.nics)
16733                      if check_fn(nic)]
16734
16735     if instconflicts:
16736       conflicts.append((instance.name, instconflicts))
16737
16738   if conflicts:
16739     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16740                   " node group '%s', are in use: %s" %
16741                   (lu.network_name, action, lu.group.name,
16742                    utils.CommaJoin(("%s: %s" %
16743                                     (name, _FmtNetworkConflict(details)))
16744                                    for (name, details) in conflicts)))
16745
16746     raise errors.OpPrereqError("Conflicting IP addresses found; "
16747                                " remove/modify the corresponding network"
16748                                " interfaces", errors.ECODE_STATE)
16749
16750
16751 def _FmtNetworkConflict(details):
16752   """Utility for L{_NetworkConflictCheck}.
16753
16754   """
16755   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16756                          for (idx, ipaddr) in details)
16757
16758
16759 class LUNetworkDisconnect(LogicalUnit):
16760   """Disconnect a network to a nodegroup
16761
16762   """
16763   HPATH = "network-disconnect"
16764   HTYPE = constants.HTYPE_NETWORK
16765   REQ_BGL = False
16766
16767   def ExpandNames(self):
16768     self.network_name = self.op.network_name
16769     self.group_name = self.op.group_name
16770
16771     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16772     if self.network_uuid is None:
16773       raise errors.OpPrereqError("Network '%s' does not exist" %
16774                                  self.network_name, errors.ECODE_NOENT)
16775
16776     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16777     if self.group_uuid is None:
16778       raise errors.OpPrereqError("Group '%s' does not exist" %
16779                                  self.group_name, errors.ECODE_NOENT)
16780
16781     self.needed_locks = {
16782       locking.LEVEL_INSTANCE: [],
16783       locking.LEVEL_NODEGROUP: [self.group_uuid],
16784       }
16785     self.share_locks[locking.LEVEL_INSTANCE] = 1
16786
16787   def DeclareLocks(self, level):
16788     if level == locking.LEVEL_INSTANCE:
16789       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16790
16791       # Lock instances optimistically, needs verification once group lock has
16792       # been acquired
16793       if self.op.conflicts_check:
16794         self.needed_locks[locking.LEVEL_INSTANCE] = \
16795           self.cfg.GetNodeGroupInstances(self.group_uuid)
16796
16797   def BuildHooksEnv(self):
16798     ret = {
16799       "GROUP_NAME": self.group_name,
16800       }
16801     return ret
16802
16803   def BuildHooksNodes(self):
16804     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16805     return (nodes, nodes)
16806
16807   def CheckPrereq(self):
16808     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16809
16810     assert self.group_uuid in owned_groups
16811
16812     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16813     self.connected = True
16814     if self.network_uuid not in self.group.networks:
16815       self.LogWarning("Network '%s' is not mapped to group '%s'",
16816                       self.network_name, self.group.name)
16817       self.connected = False
16818       return
16819
16820     if self.op.conflicts_check:
16821       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16822                             "disconnect from")
16823
16824   def Exec(self, feedback_fn):
16825     if not self.connected:
16826       return
16827
16828     del self.group.networks[self.network_uuid]
16829     self.cfg.Update(self.group, feedback_fn)
16830
16831
16832 #: Query type implementations
16833 _QUERY_IMPL = {
16834   constants.QR_CLUSTER: _ClusterQuery,
16835   constants.QR_INSTANCE: _InstanceQuery,
16836   constants.QR_NODE: _NodeQuery,
16837   constants.QR_GROUP: _GroupQuery,
16838   constants.QR_NETWORK: _NetworkQuery,
16839   constants.QR_OS: _OsQuery,
16840   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16841   constants.QR_EXPORT: _ExportQuery,
16842   }
16843
16844 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16845
16846
16847 def _GetQueryImplementation(name):
16848   """Returns the implemtnation for a query type.
16849
16850   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16851
16852   """
16853   try:
16854     return _QUERY_IMPL[name]
16855   except KeyError:
16856     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16857                                errors.ECODE_INVAL)
16858
16859
16860 def _CheckForConflictingIp(lu, ip, node):
16861   """In case of conflicting IP address raise error.
16862
16863   @type ip: string
16864   @param ip: IP address
16865   @type node: string
16866   @param node: node name
16867
16868   """
16869   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16870   if conf_net is not None:
16871     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16872                                 (ip, conf_net)),
16873                                errors.ECODE_STATE)
16874
16875   return (None, None)