code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024   """Make sure that none of the given paramters is global.
1025
1026   If a global parameter is found, an L{errors.OpPrereqError} exception is
1027   raised. This is used to avoid setting global parameters for individual nodes.
1028
1029   @type params: dictionary
1030   @param params: Parameters to check
1031   @type glob_pars: dictionary
1032   @param glob_pars: Forbidden parameters
1033   @type kind: string
1034   @param kind: Kind of parameters (e.g. "node")
1035   @type bad_levels: string
1036   @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1037       "instance")
1038   @type good_levels: strings
1039   @param good_levels: Level(s) at which the parameters are allowed (e.g.
1040       "cluster or group")
1041
1042   """
1043   used_globals = glob_pars.intersection(params)
1044   if used_globals:
1045     msg = ("The following %s parameters are global and cannot"
1046            " be customized at %s level, please modify them at"
1047            " %s level: %s" %
1048            (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051
1052 def _CheckNodeOnline(lu, node, msg=None):
1053   """Ensure that a given node is online.
1054
1055   @param lu: the LU on behalf of which we make the check
1056   @param node: the node to check
1057   @param msg: if passed, should be a message to replace the default one
1058   @raise errors.OpPrereqError: if the node is offline
1059
1060   """
1061   if msg is None:
1062     msg = "Can't use offline node"
1063   if lu.cfg.GetNodeInfo(node).offline:
1064     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066
1067 def _CheckNodeNotDrained(lu, node):
1068   """Ensure that a given node is not drained.
1069
1070   @param lu: the LU on behalf of which we make the check
1071   @param node: the node to check
1072   @raise errors.OpPrereqError: if the node is drained
1073
1074   """
1075   if lu.cfg.GetNodeInfo(node).drained:
1076     raise errors.OpPrereqError("Can't use drained node %s" % node,
1077                                errors.ECODE_STATE)
1078
1079
1080 def _CheckNodeVmCapable(lu, node):
1081   """Ensure that a given node is vm capable.
1082
1083   @param lu: the LU on behalf of which we make the check
1084   @param node: the node to check
1085   @raise errors.OpPrereqError: if the node is not vm capable
1086
1087   """
1088   if not lu.cfg.GetNodeInfo(node).vm_capable:
1089     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1090                                errors.ECODE_STATE)
1091
1092
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094   """Ensure that a node supports a given OS.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param node: the node to check
1098   @param os_name: the OS to query about
1099   @param force_variant: whether to ignore variant errors
1100   @raise errors.OpPrereqError: if the node is not supporting the OS
1101
1102   """
1103   result = lu.rpc.call_os_get(node, os_name)
1104   result.Raise("OS '%s' not in supported OS list for node %s" %
1105                (os_name, node),
1106                prereq=True, ecode=errors.ECODE_INVAL)
1107   if not force_variant:
1108     _CheckOSVariant(result.payload, os_name)
1109
1110
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112   """Ensure that a node has the given secondary ip.
1113
1114   @type lu: L{LogicalUnit}
1115   @param lu: the LU on behalf of which we make the check
1116   @type node: string
1117   @param node: the node to check
1118   @type secondary_ip: string
1119   @param secondary_ip: the ip to check
1120   @type prereq: boolean
1121   @param prereq: whether to throw a prerequisite or an execute error
1122   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1124
1125   """
1126   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127   result.Raise("Failure checking secondary ip on node %s" % node,
1128                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129   if not result.payload:
1130     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131            " please fix and re-run this command" % secondary_ip)
1132     if prereq:
1133       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1134     else:
1135       raise errors.OpExecError(msg)
1136
1137
1138 def _CheckNodePVs(nresult, exclusive_storage):
1139   """Check node PVs.
1140
1141   """
1142   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143   if pvlist_dict is None:
1144     return (["Can't get PV list from node"], None)
1145   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1146   errlist = []
1147   # check that ':' is not present in PV names, since it's a
1148   # special character for lvcreate (denotes the range of PEs to
1149   # use on the PV)
1150   for pv in pvlist:
1151     if ":" in pv.name:
1152       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153                      (pv.name, pv.vg_name))
1154   es_pvinfo = None
1155   if exclusive_storage:
1156     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157     errlist.extend(errmsgs)
1158     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1159     if shared_pvs:
1160       for (pvname, lvlist) in shared_pvs:
1161         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163                        (pvname, utils.CommaJoin(lvlist)))
1164   return (errlist, es_pvinfo)
1165
1166
1167 def _GetClusterDomainSecret():
1168   """Reads the cluster domain secret.
1169
1170   """
1171   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1172                                strict=True)
1173
1174
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176   """Ensure that an instance is in one of the required states.
1177
1178   @param lu: the LU on behalf of which we make the check
1179   @param instance: the instance to check
1180   @param msg: if passed, should be a message to replace the default one
1181   @raise errors.OpPrereqError: if the instance is not in the required state
1182
1183   """
1184   if msg is None:
1185     msg = ("can't use instance from outside %s states" %
1186            utils.CommaJoin(req_states))
1187   if instance.admin_state not in req_states:
1188     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189                                (instance.name, instance.admin_state, msg),
1190                                errors.ECODE_STATE)
1191
1192   if constants.ADMINST_UP not in req_states:
1193     pnode = instance.primary_node
1194     if not lu.cfg.GetNodeInfo(pnode).offline:
1195       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197                   prereq=True, ecode=errors.ECODE_ENVIRON)
1198       if instance.name in ins_l.payload:
1199         raise errors.OpPrereqError("Instance %s is running, %s" %
1200                                    (instance.name, msg), errors.ECODE_STATE)
1201     else:
1202       lu.LogWarning("Primary node offline, ignoring check that instance"
1203                      " is down")
1204
1205
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207   """Computes if value is in the desired range.
1208
1209   @param name: name of the parameter for which we perform the check
1210   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1211       not just 'disk')
1212   @param ipolicy: dictionary containing min, max and std values
1213   @param value: actual value that we want to use
1214   @return: None or element not meeting the criteria
1215
1216
1217   """
1218   if value in [None, constants.VALUE_AUTO]:
1219     return None
1220   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222   if value > max_v or min_v > value:
1223     if qualifier:
1224       fqn = "%s/%s" % (name, qualifier)
1225     else:
1226       fqn = name
1227     return ("%s value %s is not in range [%s, %s]" %
1228             (fqn, value, min_v, max_v))
1229   return None
1230
1231
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233                                  nic_count, disk_sizes, spindle_use,
1234                                  _compute_fn=_ComputeMinMaxSpec):
1235   """Verifies ipolicy against provided specs.
1236
1237   @type ipolicy: dict
1238   @param ipolicy: The ipolicy
1239   @type mem_size: int
1240   @param mem_size: The memory size
1241   @type cpu_count: int
1242   @param cpu_count: Used cpu cores
1243   @type disk_count: int
1244   @param disk_count: Number of disks used
1245   @type nic_count: int
1246   @param nic_count: Number of nics used
1247   @type disk_sizes: list of ints
1248   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249   @type spindle_use: int
1250   @param spindle_use: The number of spindles this instance uses
1251   @param _compute_fn: The compute function (unittest only)
1252   @return: A list of violations, or an empty list of no violations are found
1253
1254   """
1255   assert disk_count == len(disk_sizes)
1256
1257   test_settings = [
1258     (constants.ISPEC_MEM_SIZE, "", mem_size),
1259     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260     (constants.ISPEC_DISK_COUNT, "", disk_count),
1261     (constants.ISPEC_NIC_COUNT, "", nic_count),
1262     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264          for idx, d in enumerate(disk_sizes)]
1265
1266   return filter(None,
1267                 (_compute_fn(name, qualifier, ipolicy, value)
1268                  for (name, qualifier, value) in test_settings))
1269
1270
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272                                      _compute_fn=_ComputeIPolicySpecViolation):
1273   """Compute if instance meets the specs of ipolicy.
1274
1275   @type ipolicy: dict
1276   @param ipolicy: The ipolicy to verify against
1277   @type instance: L{objects.Instance}
1278   @param instance: The instance to verify
1279   @param _compute_fn: The function to verify ipolicy (unittest only)
1280   @see: L{_ComputeIPolicySpecViolation}
1281
1282   """
1283   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286   disk_count = len(instance.disks)
1287   disk_sizes = [disk.size for disk in instance.disks]
1288   nic_count = len(instance.nics)
1289
1290   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291                      disk_sizes, spindle_use)
1292
1293
1294 def _ComputeIPolicyInstanceSpecViolation(
1295   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296   """Compute if instance specs meets the specs of ipolicy.
1297
1298   @type ipolicy: dict
1299   @param ipolicy: The ipolicy to verify against
1300   @param instance_spec: dict
1301   @param instance_spec: The instance spec to verify
1302   @param _compute_fn: The function to verify ipolicy (unittest only)
1303   @see: L{_ComputeIPolicySpecViolation}
1304
1305   """
1306   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1312
1313   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314                      disk_sizes, spindle_use)
1315
1316
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1318                                  target_group,
1319                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1320   """Compute if instance meets the specs of the new target group.
1321
1322   @param ipolicy: The ipolicy to verify
1323   @param instance: The instance object to verify
1324   @param current_group: The current group of the instance
1325   @param target_group: The new group of the instance
1326   @param _compute_fn: The function to verify ipolicy (unittest only)
1327   @see: L{_ComputeIPolicySpecViolation}
1328
1329   """
1330   if current_group == target_group:
1331     return []
1332   else:
1333     return _compute_fn(ipolicy, instance)
1334
1335
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337                             _compute_fn=_ComputeIPolicyNodeViolation):
1338   """Checks that the target node is correct in terms of instance policy.
1339
1340   @param ipolicy: The ipolicy to verify
1341   @param instance: The instance object to verify
1342   @param node: The new node to relocate
1343   @param ignore: Ignore violations of the ipolicy
1344   @param _compute_fn: The function to verify ipolicy (unittest only)
1345   @see: L{_ComputeIPolicySpecViolation}
1346
1347   """
1348   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1350
1351   if res:
1352     msg = ("Instance does not meet target node group's (%s) instance"
1353            " policy: %s") % (node.group, utils.CommaJoin(res))
1354     if ignore:
1355       lu.LogWarning(msg)
1356     else:
1357       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1358
1359
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361   """Computes a set of any instances that would violate the new ipolicy.
1362
1363   @param old_ipolicy: The current (still in-place) ipolicy
1364   @param new_ipolicy: The new (to become) ipolicy
1365   @param instances: List of instances to verify
1366   @return: A list of instances which violates the new ipolicy but
1367       did not before
1368
1369   """
1370   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371           _ComputeViolatingInstances(old_ipolicy, instances))
1372
1373
1374 def _ExpandItemName(fn, name, kind):
1375   """Expand an item name.
1376
1377   @param fn: the function to use for expansion
1378   @param name: requested item name
1379   @param kind: text description ('Node' or 'Instance')
1380   @return: the resolved (full) name
1381   @raise errors.OpPrereqError: if the item is not found
1382
1383   """
1384   full_name = fn(name)
1385   if full_name is None:
1386     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1387                                errors.ECODE_NOENT)
1388   return full_name
1389
1390
1391 def _ExpandNodeName(cfg, name):
1392   """Wrapper over L{_ExpandItemName} for nodes."""
1393   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1394
1395
1396 def _ExpandInstanceName(cfg, name):
1397   """Wrapper over L{_ExpandItemName} for instance."""
1398   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1399
1400
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1402                          mac_prefix, tags):
1403   """Builds network related env variables for hooks
1404
1405   This builds the hook environment from individual variables.
1406
1407   @type name: string
1408   @param name: the name of the network
1409   @type subnet: string
1410   @param subnet: the ipv4 subnet
1411   @type gateway: string
1412   @param gateway: the ipv4 gateway
1413   @type network6: string
1414   @param network6: the ipv6 subnet
1415   @type gateway6: string
1416   @param gateway6: the ipv6 gateway
1417   @type mac_prefix: string
1418   @param mac_prefix: the mac_prefix
1419   @type tags: list
1420   @param tags: the tags of the network
1421
1422   """
1423   env = {}
1424   if name:
1425     env["NETWORK_NAME"] = name
1426   if subnet:
1427     env["NETWORK_SUBNET"] = subnet
1428   if gateway:
1429     env["NETWORK_GATEWAY"] = gateway
1430   if network6:
1431     env["NETWORK_SUBNET6"] = network6
1432   if gateway6:
1433     env["NETWORK_GATEWAY6"] = gateway6
1434   if mac_prefix:
1435     env["NETWORK_MAC_PREFIX"] = mac_prefix
1436   if tags:
1437     env["NETWORK_TAGS"] = " ".join(tags)
1438
1439   return env
1440
1441
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443                           minmem, maxmem, vcpus, nics, disk_template, disks,
1444                           bep, hvp, hypervisor_name, tags):
1445   """Builds instance related env variables for hooks
1446
1447   This builds the hook environment from individual variables.
1448
1449   @type name: string
1450   @param name: the name of the instance
1451   @type primary_node: string
1452   @param primary_node: the name of the instance's primary node
1453   @type secondary_nodes: list
1454   @param secondary_nodes: list of secondary nodes as strings
1455   @type os_type: string
1456   @param os_type: the name of the instance's OS
1457   @type status: string
1458   @param status: the desired status of the instance
1459   @type minmem: string
1460   @param minmem: the minimum memory size of the instance
1461   @type maxmem: string
1462   @param maxmem: the maximum memory size of the instance
1463   @type vcpus: string
1464   @param vcpus: the count of VCPUs the instance has
1465   @type nics: list
1466   @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467       the NICs the instance has
1468   @type disk_template: string
1469   @param disk_template: the disk template of the instance
1470   @type disks: list
1471   @param disks: the list of (size, mode) pairs
1472   @type bep: dict
1473   @param bep: the backend parameters for the instance
1474   @type hvp: dict
1475   @param hvp: the hypervisor parameters for the instance
1476   @type hypervisor_name: string
1477   @param hypervisor_name: the hypervisor for the instance
1478   @type tags: list
1479   @param tags: list of instance tags as strings
1480   @rtype: dict
1481   @return: the hook environment for this instance
1482
1483   """
1484   env = {
1485     "OP_TARGET": name,
1486     "INSTANCE_NAME": name,
1487     "INSTANCE_PRIMARY": primary_node,
1488     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489     "INSTANCE_OS_TYPE": os_type,
1490     "INSTANCE_STATUS": status,
1491     "INSTANCE_MINMEM": minmem,
1492     "INSTANCE_MAXMEM": maxmem,
1493     # TODO(2.7) remove deprecated "memory" value
1494     "INSTANCE_MEMORY": maxmem,
1495     "INSTANCE_VCPUS": vcpus,
1496     "INSTANCE_DISK_TEMPLATE": disk_template,
1497     "INSTANCE_HYPERVISOR": hypervisor_name,
1498   }
1499   if nics:
1500     nic_count = len(nics)
1501     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1502       if ip is None:
1503         ip = ""
1504       env["INSTANCE_NIC%d_IP" % idx] = ip
1505       env["INSTANCE_NIC%d_MAC" % idx] = mac
1506       env["INSTANCE_NIC%d_MODE" % idx] = mode
1507       env["INSTANCE_NIC%d_LINK" % idx] = link
1508       if netinfo:
1509         nobj = objects.Network.FromDict(netinfo)
1510         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1511       elif network:
1512         # FIXME: broken network reference: the instance NIC specifies a
1513         # network, but the relevant network entry was not in the config. This
1514         # should be made impossible.
1515         env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   cluster = lu.cfg.GetClusterInfo()
1555   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556   mode = filled_params[constants.NIC_MODE]
1557   link = filled_params[constants.NIC_LINK]
1558   netinfo = None
1559   if nic.network:
1560     nobj = lu.cfg.GetNetwork(nic.network)
1561     netinfo = objects.Network.ToDict(nobj)
1562   return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1563
1564
1565 def _NICListToTuple(lu, nics):
1566   """Build a list of nic information tuples.
1567
1568   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569   value in LUInstanceQueryData.
1570
1571   @type lu:  L{LogicalUnit}
1572   @param lu: the logical unit on whose behalf we execute
1573   @type nics: list of L{objects.NIC}
1574   @param nics: list of nics to convert to hooks tuples
1575
1576   """
1577   hooks_nics = []
1578   for nic in nics:
1579     hooks_nics.append(_NICToTuple(lu, nic))
1580   return hooks_nics
1581
1582
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584   """Builds instance related env variables for hooks from an object.
1585
1586   @type lu: L{LogicalUnit}
1587   @param lu: the logical unit on whose behalf we execute
1588   @type instance: L{objects.Instance}
1589   @param instance: the instance for which we should build the
1590       environment
1591   @type override: dict
1592   @param override: dictionary with key/values that will override
1593       our values
1594   @rtype: dict
1595   @return: the hook environment dictionary
1596
1597   """
1598   cluster = lu.cfg.GetClusterInfo()
1599   bep = cluster.FillBE(instance)
1600   hvp = cluster.FillHV(instance)
1601   args = {
1602     "name": instance.name,
1603     "primary_node": instance.primary_node,
1604     "secondary_nodes": instance.secondary_nodes,
1605     "os_type": instance.os,
1606     "status": instance.admin_state,
1607     "maxmem": bep[constants.BE_MAXMEM],
1608     "minmem": bep[constants.BE_MINMEM],
1609     "vcpus": bep[constants.BE_VCPUS],
1610     "nics": _NICListToTuple(lu, instance.nics),
1611     "disk_template": instance.disk_template,
1612     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1613     "bep": bep,
1614     "hvp": hvp,
1615     "hypervisor_name": instance.hypervisor,
1616     "tags": instance.tags,
1617   }
1618   if override:
1619     args.update(override)
1620   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1621
1622
1623 def _AdjustCandidatePool(lu, exceptions):
1624   """Adjust the candidate pool after node operations.
1625
1626   """
1627   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1628   if mod_list:
1629     lu.LogInfo("Promoted nodes to master candidate role: %s",
1630                utils.CommaJoin(node.name for node in mod_list))
1631     for name in mod_list:
1632       lu.context.ReaddNode(name)
1633   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1634   if mc_now > mc_max:
1635     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1636                (mc_now, mc_max))
1637
1638
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640   """Decide whether I should promote myself as a master candidate.
1641
1642   """
1643   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645   # the new node will increase mc_max with one, so:
1646   mc_should = min(mc_should + 1, cp_size)
1647   return mc_now < mc_should
1648
1649
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651   """Computes a set of instances who violates given ipolicy.
1652
1653   @param ipolicy: The ipolicy to verify
1654   @type instances: object.Instance
1655   @param instances: List of instances to verify
1656   @return: A frozenset of instance names violating the ipolicy
1657
1658   """
1659   return frozenset([inst.name for inst in instances
1660                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1661
1662
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664   """Check that the brigdes needed by a list of nics exist.
1665
1666   """
1667   cluster = lu.cfg.GetClusterInfo()
1668   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669   brlist = [params[constants.NIC_LINK] for params in paramslist
1670             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1671   if brlist:
1672     result = lu.rpc.call_bridges_exist(target_node, brlist)
1673     result.Raise("Error checking bridges on destination node '%s'" %
1674                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1675
1676
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678   """Check that the brigdes needed by an instance exist.
1679
1680   """
1681   if node is None:
1682     node = instance.primary_node
1683   _CheckNicsBridgesExist(lu, instance.nics, node)
1684
1685
1686 def _CheckOSVariant(os_obj, name):
1687   """Check whether an OS name conforms to the os variants specification.
1688
1689   @type os_obj: L{objects.OS}
1690   @param os_obj: OS object to check
1691   @type name: string
1692   @param name: OS name passed by the user, to check for validity
1693
1694   """
1695   variant = objects.OS.GetVariant(name)
1696   if not os_obj.supported_variants:
1697     if variant:
1698       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699                                  " passed)" % (os_obj.name, variant),
1700                                  errors.ECODE_INVAL)
1701     return
1702   if not variant:
1703     raise errors.OpPrereqError("OS name must include a variant",
1704                                errors.ECODE_INVAL)
1705
1706   if variant not in os_obj.supported_variants:
1707     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1708
1709
1710 def _GetNodeInstancesInner(cfg, fn):
1711   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1712
1713
1714 def _GetNodeInstances(cfg, node_name):
1715   """Returns a list of all primary and secondary instances on a node.
1716
1717   """
1718
1719   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1720
1721
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723   """Returns primary instances on a node.
1724
1725   """
1726   return _GetNodeInstancesInner(cfg,
1727                                 lambda inst: node_name == inst.primary_node)
1728
1729
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731   """Returns secondary instances on a node.
1732
1733   """
1734   return _GetNodeInstancesInner(cfg,
1735                                 lambda inst: node_name in inst.secondary_nodes)
1736
1737
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739   """Returns the arguments for a storage type.
1740
1741   """
1742   # Special case for file storage
1743   if storage_type == constants.ST_FILE:
1744     # storage.FileStorage wants a list of storage directories
1745     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1746
1747   return []
1748
1749
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1751   faulty = []
1752
1753   for dev in instance.disks:
1754     cfg.SetDiskID(dev, node_name)
1755
1756   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1757                                                                 instance))
1758   result.Raise("Failed to get disk status from node %s" % node_name,
1759                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1760
1761   for idx, bdev_status in enumerate(result.payload):
1762     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1763       faulty.append(idx)
1764
1765   return faulty
1766
1767
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769   """Check the sanity of iallocator and node arguments and use the
1770   cluster-wide iallocator if appropriate.
1771
1772   Check that at most one of (iallocator, node) is specified. If none is
1773   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774   then the LU's opcode's iallocator slot is filled with the cluster-wide
1775   default iallocator.
1776
1777   @type iallocator_slot: string
1778   @param iallocator_slot: the name of the opcode iallocator slot
1779   @type node_slot: string
1780   @param node_slot: the name of the opcode target node slot
1781
1782   """
1783   node = getattr(lu.op, node_slot, None)
1784   ialloc = getattr(lu.op, iallocator_slot, None)
1785   if node == []:
1786     node = None
1787
1788   if node is not None and ialloc is not None:
1789     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1790                                errors.ECODE_INVAL)
1791   elif ((node is None and ialloc is None) or
1792         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793     default_iallocator = lu.cfg.GetDefaultIAllocator()
1794     if default_iallocator:
1795       setattr(lu.op, iallocator_slot, default_iallocator)
1796     else:
1797       raise errors.OpPrereqError("No iallocator or node given and no"
1798                                  " cluster-wide default iallocator found;"
1799                                  " please specify either an iallocator or a"
1800                                  " node, or set a cluster-wide default"
1801                                  " iallocator", errors.ECODE_INVAL)
1802
1803
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805   """Decides on which iallocator to use.
1806
1807   @type cfg: L{config.ConfigWriter}
1808   @param cfg: Cluster configuration object
1809   @type ialloc: string or None
1810   @param ialloc: Iallocator specified in opcode
1811   @rtype: string
1812   @return: Iallocator name
1813
1814   """
1815   if not ialloc:
1816     # Use default iallocator
1817     ialloc = cfg.GetDefaultIAllocator()
1818
1819   if not ialloc:
1820     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821                                " opcode nor as a cluster-wide default",
1822                                errors.ECODE_INVAL)
1823
1824   return ialloc
1825
1826
1827 def _CheckHostnameSane(lu, name):
1828   """Ensures that a given hostname resolves to a 'sane' name.
1829
1830   The given name is required to be a prefix of the resolved hostname,
1831   to prevent accidental mismatches.
1832
1833   @param lu: the logical unit on behalf of which we're checking
1834   @param name: the name we should resolve and check
1835   @return: the resolved hostname object
1836
1837   """
1838   hostname = netutils.GetHostname(name=name)
1839   if hostname.name != name:
1840     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841   if not utils.MatchNameComponent(name, [hostname.name]):
1842     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843                                 " same as given hostname '%s'") %
1844                                 (hostname.name, name), errors.ECODE_INVAL)
1845   return hostname
1846
1847
1848 class LUClusterPostInit(LogicalUnit):
1849   """Logical unit for running hooks after cluster initialization.
1850
1851   """
1852   HPATH = "cluster-init"
1853   HTYPE = constants.HTYPE_CLUSTER
1854
1855   def BuildHooksEnv(self):
1856     """Build hooks env.
1857
1858     """
1859     return {
1860       "OP_TARGET": self.cfg.GetClusterName(),
1861       }
1862
1863   def BuildHooksNodes(self):
1864     """Build hooks nodes.
1865
1866     """
1867     return ([], [self.cfg.GetMasterNode()])
1868
1869   def Exec(self, feedback_fn):
1870     """Nothing to do.
1871
1872     """
1873     return True
1874
1875
1876 class LUClusterDestroy(LogicalUnit):
1877   """Logical unit for destroying the cluster.
1878
1879   """
1880   HPATH = "cluster-destroy"
1881   HTYPE = constants.HTYPE_CLUSTER
1882
1883   def BuildHooksEnv(self):
1884     """Build hooks env.
1885
1886     """
1887     return {
1888       "OP_TARGET": self.cfg.GetClusterName(),
1889       }
1890
1891   def BuildHooksNodes(self):
1892     """Build hooks nodes.
1893
1894     """
1895     return ([], [])
1896
1897   def CheckPrereq(self):
1898     """Check prerequisites.
1899
1900     This checks whether the cluster is empty.
1901
1902     Any errors are signaled by raising errors.OpPrereqError.
1903
1904     """
1905     master = self.cfg.GetMasterNode()
1906
1907     nodelist = self.cfg.GetNodeList()
1908     if len(nodelist) != 1 or nodelist[0] != master:
1909       raise errors.OpPrereqError("There are still %d node(s) in"
1910                                  " this cluster." % (len(nodelist) - 1),
1911                                  errors.ECODE_INVAL)
1912     instancelist = self.cfg.GetInstanceList()
1913     if instancelist:
1914       raise errors.OpPrereqError("There are still %d instance(s) in"
1915                                  " this cluster." % len(instancelist),
1916                                  errors.ECODE_INVAL)
1917
1918   def Exec(self, feedback_fn):
1919     """Destroys the cluster.
1920
1921     """
1922     master_params = self.cfg.GetMasterNetworkParameters()
1923
1924     # Run post hooks on master node before it's removed
1925     _RunPostHook(self, master_params.name)
1926
1927     ems = self.cfg.GetUseExternalMipScript()
1928     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1929                                                      master_params, ems)
1930     if result.fail_msg:
1931       self.LogWarning("Error disabling the master IP address: %s",
1932                       result.fail_msg)
1933
1934     return master_params.name
1935
1936
1937 def _VerifyCertificate(filename):
1938   """Verifies a certificate for L{LUClusterVerifyConfig}.
1939
1940   @type filename: string
1941   @param filename: Path to PEM file
1942
1943   """
1944   try:
1945     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946                                            utils.ReadFile(filename))
1947   except Exception, err: # pylint: disable=W0703
1948     return (LUClusterVerifyConfig.ETYPE_ERROR,
1949             "Failed to load X509 certificate %s: %s" % (filename, err))
1950
1951   (errcode, msg) = \
1952     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953                                 constants.SSL_CERT_EXPIRATION_ERROR)
1954
1955   if msg:
1956     fnamemsg = "While verifying %s: %s" % (filename, msg)
1957   else:
1958     fnamemsg = None
1959
1960   if errcode is None:
1961     return (None, fnamemsg)
1962   elif errcode == utils.CERT_WARNING:
1963     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964   elif errcode == utils.CERT_ERROR:
1965     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1966
1967   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1968
1969
1970 def _GetAllHypervisorParameters(cluster, instances):
1971   """Compute the set of all hypervisor parameters.
1972
1973   @type cluster: L{objects.Cluster}
1974   @param cluster: the cluster object
1975   @param instances: list of L{objects.Instance}
1976   @param instances: additional instances from which to obtain parameters
1977   @rtype: list of (origin, hypervisor, parameters)
1978   @return: a list with all parameters found, indicating the hypervisor they
1979        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1980
1981   """
1982   hvp_data = []
1983
1984   for hv_name in cluster.enabled_hypervisors:
1985     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1986
1987   for os_name, os_hvp in cluster.os_hvp.items():
1988     for hv_name, hv_params in os_hvp.items():
1989       if hv_params:
1990         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1992
1993   # TODO: collapse identical parameter values in a single one
1994   for instance in instances:
1995     if instance.hvparams:
1996       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997                        cluster.FillHV(instance)))
1998
1999   return hvp_data
2000
2001
2002 class _VerifyErrors(object):
2003   """Mix-in for cluster/group verify LUs.
2004
2005   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006   self.op and self._feedback_fn to be available.)
2007
2008   """
2009
2010   ETYPE_FIELD = "code"
2011   ETYPE_ERROR = "ERROR"
2012   ETYPE_WARNING = "WARNING"
2013
2014   def _Error(self, ecode, item, msg, *args, **kwargs):
2015     """Format an error message.
2016
2017     Based on the opcode's error_codes parameter, either format a
2018     parseable error code, or a simpler error string.
2019
2020     This must be called only from Exec and functions called from Exec.
2021
2022     """
2023     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024     itype, etxt, _ = ecode
2025     # If the error code is in the list of ignored errors, demote the error to a
2026     # warning
2027     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2028       ltype = self.ETYPE_WARNING
2029     # first complete the msg
2030     if args:
2031       msg = msg % args
2032     # then format the whole message
2033     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2035     else:
2036       if item:
2037         item = " " + item
2038       else:
2039         item = ""
2040       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041     # and finally report it via the feedback_fn
2042     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2043     # do not mark the operation as failed for WARN cases only
2044     if ltype == self.ETYPE_ERROR:
2045       self.bad = True
2046
2047   def _ErrorIf(self, cond, *args, **kwargs):
2048     """Log an error message if the passed condition is True.
2049
2050     """
2051     if (bool(cond)
2052         or self.op.debug_simulate_errors): # pylint: disable=E1101
2053       self._Error(*args, **kwargs)
2054
2055
2056 class LUClusterVerify(NoHooksLU):
2057   """Submits all jobs necessary to verify the cluster.
2058
2059   """
2060   REQ_BGL = False
2061
2062   def ExpandNames(self):
2063     self.needed_locks = {}
2064
2065   def Exec(self, feedback_fn):
2066     jobs = []
2067
2068     if self.op.group_name:
2069       groups = [self.op.group_name]
2070       depends_fn = lambda: None
2071     else:
2072       groups = self.cfg.GetNodeGroupList()
2073
2074       # Verify global configuration
2075       jobs.append([
2076         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2077         ])
2078
2079       # Always depend on global verification
2080       depends_fn = lambda: [(-len(jobs), [])]
2081
2082     jobs.extend(
2083       [opcodes.OpClusterVerifyGroup(group_name=group,
2084                                     ignore_errors=self.op.ignore_errors,
2085                                     depends=depends_fn())]
2086       for group in groups)
2087
2088     # Fix up all parameters
2089     for op in itertools.chain(*jobs): # pylint: disable=W0142
2090       op.debug_simulate_errors = self.op.debug_simulate_errors
2091       op.verbose = self.op.verbose
2092       op.error_codes = self.op.error_codes
2093       try:
2094         op.skip_checks = self.op.skip_checks
2095       except AttributeError:
2096         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2097
2098     return ResultWithJobs(jobs)
2099
2100
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102   """Verifies the cluster config.
2103
2104   """
2105   REQ_BGL = False
2106
2107   def _VerifyHVP(self, hvp_data):
2108     """Verifies locally the syntax of the hypervisor parameters.
2109
2110     """
2111     for item, hv_name, hv_params in hvp_data:
2112       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2113              (item, hv_name))
2114       try:
2115         hv_class = hypervisor.GetHypervisorClass(hv_name)
2116         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117         hv_class.CheckParameterSyntax(hv_params)
2118       except errors.GenericError, err:
2119         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2120
2121   def ExpandNames(self):
2122     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123     self.share_locks = _ShareAll()
2124
2125   def CheckPrereq(self):
2126     """Check prerequisites.
2127
2128     """
2129     # Retrieve all information
2130     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131     self.all_node_info = self.cfg.GetAllNodesInfo()
2132     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2133
2134   def Exec(self, feedback_fn):
2135     """Verify integrity of cluster, performing various test on nodes.
2136
2137     """
2138     self.bad = False
2139     self._feedback_fn = feedback_fn
2140
2141     feedback_fn("* Verifying cluster config")
2142
2143     for msg in self.cfg.VerifyConfig():
2144       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2145
2146     feedback_fn("* Verifying cluster certificate files")
2147
2148     for cert_filename in pathutils.ALL_CERT_FILES:
2149       (errcode, msg) = _VerifyCertificate(cert_filename)
2150       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2151
2152     feedback_fn("* Verifying hypervisor parameters")
2153
2154     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155                                                 self.all_inst_info.values()))
2156
2157     feedback_fn("* Verifying all nodes belong to an existing group")
2158
2159     # We do this verification here because, should this bogus circumstance
2160     # occur, it would never be caught by VerifyGroup, which only acts on
2161     # nodes/instances reachable from existing node groups.
2162
2163     dangling_nodes = set(node.name for node in self.all_node_info.values()
2164                          if node.group not in self.all_group_info)
2165
2166     dangling_instances = {}
2167     no_node_instances = []
2168
2169     for inst in self.all_inst_info.values():
2170       if inst.primary_node in dangling_nodes:
2171         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172       elif inst.primary_node not in self.all_node_info:
2173         no_node_instances.append(inst.name)
2174
2175     pretty_dangling = [
2176         "%s (%s)" %
2177         (node.name,
2178          utils.CommaJoin(dangling_instances.get(node.name,
2179                                                 ["no instances"])))
2180         for node in dangling_nodes]
2181
2182     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2183                   None,
2184                   "the following nodes (and their instances) belong to a non"
2185                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2186
2187     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2188                   None,
2189                   "the following instances have a non-existing primary-node:"
2190                   " %s", utils.CommaJoin(no_node_instances))
2191
2192     return not self.bad
2193
2194
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196   """Verifies the status of a node group.
2197
2198   """
2199   HPATH = "cluster-verify"
2200   HTYPE = constants.HTYPE_CLUSTER
2201   REQ_BGL = False
2202
2203   _HOOKS_INDENT_RE = re.compile("^", re.M)
2204
2205   class NodeImage(object):
2206     """A class representing the logical and physical status of a node.
2207
2208     @type name: string
2209     @ivar name: the node name to which this object refers
2210     @ivar volumes: a structure as returned from
2211         L{ganeti.backend.GetVolumeList} (runtime)
2212     @ivar instances: a list of running instances (runtime)
2213     @ivar pinst: list of configured primary instances (config)
2214     @ivar sinst: list of configured secondary instances (config)
2215     @ivar sbp: dictionary of {primary-node: list of instances} for all
2216         instances for which this node is secondary (config)
2217     @ivar mfree: free memory, as reported by hypervisor (runtime)
2218     @ivar dfree: free disk, as reported by the node (runtime)
2219     @ivar offline: the offline status (config)
2220     @type rpc_fail: boolean
2221     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222         not whether the individual keys were correct) (runtime)
2223     @type lvm_fail: boolean
2224     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225     @type hyp_fail: boolean
2226     @ivar hyp_fail: whether the RPC call didn't return the instance list
2227     @type ghost: boolean
2228     @ivar ghost: whether this is a known node or not (config)
2229     @type os_fail: boolean
2230     @ivar os_fail: whether the RPC call didn't return valid OS data
2231     @type oslist: list
2232     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233     @type vm_capable: boolean
2234     @ivar vm_capable: whether the node can host instances
2235     @type pv_min: float
2236     @ivar pv_min: size in MiB of the smallest PVs
2237     @type pv_max: float
2238     @ivar pv_max: size in MiB of the biggest PVs
2239
2240     """
2241     def __init__(self, offline=False, name=None, vm_capable=True):
2242       self.name = name
2243       self.volumes = {}
2244       self.instances = []
2245       self.pinst = []
2246       self.sinst = []
2247       self.sbp = {}
2248       self.mfree = 0
2249       self.dfree = 0
2250       self.offline = offline
2251       self.vm_capable = vm_capable
2252       self.rpc_fail = False
2253       self.lvm_fail = False
2254       self.hyp_fail = False
2255       self.ghost = False
2256       self.os_fail = False
2257       self.oslist = {}
2258       self.pv_min = None
2259       self.pv_max = None
2260
2261   def ExpandNames(self):
2262     # This raises errors.OpPrereqError on its own:
2263     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2264
2265     # Get instances in node group; this is unsafe and needs verification later
2266     inst_names = \
2267       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2268
2269     self.needed_locks = {
2270       locking.LEVEL_INSTANCE: inst_names,
2271       locking.LEVEL_NODEGROUP: [self.group_uuid],
2272       locking.LEVEL_NODE: [],
2273
2274       # This opcode is run by watcher every five minutes and acquires all nodes
2275       # for a group. It doesn't run for a long time, so it's better to acquire
2276       # the node allocation lock as well.
2277       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2278       }
2279
2280     self.share_locks = _ShareAll()
2281
2282   def DeclareLocks(self, level):
2283     if level == locking.LEVEL_NODE:
2284       # Get members of node group; this is unsafe and needs verification later
2285       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2286
2287       all_inst_info = self.cfg.GetAllInstancesInfo()
2288
2289       # In Exec(), we warn about mirrored instances that have primary and
2290       # secondary living in separate node groups. To fully verify that
2291       # volumes for these instances are healthy, we will need to do an
2292       # extra call to their secondaries. We ensure here those nodes will
2293       # be locked.
2294       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295         # Important: access only the instances whose lock is owned
2296         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297           nodes.update(all_inst_info[inst].secondary_nodes)
2298
2299       self.needed_locks[locking.LEVEL_NODE] = nodes
2300
2301   def CheckPrereq(self):
2302     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2304
2305     group_nodes = set(self.group_info.members)
2306     group_instances = \
2307       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2308
2309     unlocked_nodes = \
2310         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2311
2312     unlocked_instances = \
2313         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2314
2315     if unlocked_nodes:
2316       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317                                  utils.CommaJoin(unlocked_nodes),
2318                                  errors.ECODE_STATE)
2319
2320     if unlocked_instances:
2321       raise errors.OpPrereqError("Missing lock for instances: %s" %
2322                                  utils.CommaJoin(unlocked_instances),
2323                                  errors.ECODE_STATE)
2324
2325     self.all_node_info = self.cfg.GetAllNodesInfo()
2326     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2327
2328     self.my_node_names = utils.NiceSort(group_nodes)
2329     self.my_inst_names = utils.NiceSort(group_instances)
2330
2331     self.my_node_info = dict((name, self.all_node_info[name])
2332                              for name in self.my_node_names)
2333
2334     self.my_inst_info = dict((name, self.all_inst_info[name])
2335                              for name in self.my_inst_names)
2336
2337     # We detect here the nodes that will need the extra RPC calls for verifying
2338     # split LV volumes; they should be locked.
2339     extra_lv_nodes = set()
2340
2341     for inst in self.my_inst_info.values():
2342       if inst.disk_template in constants.DTS_INT_MIRROR:
2343         for nname in inst.all_nodes:
2344           if self.all_node_info[nname].group != self.group_uuid:
2345             extra_lv_nodes.add(nname)
2346
2347     unlocked_lv_nodes = \
2348         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2349
2350     if unlocked_lv_nodes:
2351       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352                                  utils.CommaJoin(unlocked_lv_nodes),
2353                                  errors.ECODE_STATE)
2354     self.extra_lv_nodes = list(extra_lv_nodes)
2355
2356   def _VerifyNode(self, ninfo, nresult):
2357     """Perform some basic validation on data returned from a node.
2358
2359       - check the result data structure is well formed and has all the
2360         mandatory fields
2361       - check ganeti version
2362
2363     @type ninfo: L{objects.Node}
2364     @param ninfo: the node to check
2365     @param nresult: the results from the node
2366     @rtype: boolean
2367     @return: whether overall this call was successful (and we can expect
2368          reasonable values in the respose)
2369
2370     """
2371     node = ninfo.name
2372     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2373
2374     # main result, nresult should be a non-empty dict
2375     test = not nresult or not isinstance(nresult, dict)
2376     _ErrorIf(test, constants.CV_ENODERPC, node,
2377                   "unable to verify node: no data returned")
2378     if test:
2379       return False
2380
2381     # compares ganeti version
2382     local_version = constants.PROTOCOL_VERSION
2383     remote_version = nresult.get("version", None)
2384     test = not (remote_version and
2385                 isinstance(remote_version, (list, tuple)) and
2386                 len(remote_version) == 2)
2387     _ErrorIf(test, constants.CV_ENODERPC, node,
2388              "connection to node returned invalid data")
2389     if test:
2390       return False
2391
2392     test = local_version != remote_version[0]
2393     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394              "incompatible protocol versions: master %s,"
2395              " node %s", local_version, remote_version[0])
2396     if test:
2397       return False
2398
2399     # node seems compatible, we can actually try to look into its results
2400
2401     # full package version
2402     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403                   constants.CV_ENODEVERSION, node,
2404                   "software version mismatch: master %s, node %s",
2405                   constants.RELEASE_VERSION, remote_version[1],
2406                   code=self.ETYPE_WARNING)
2407
2408     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409     if ninfo.vm_capable and isinstance(hyp_result, dict):
2410       for hv_name, hv_result in hyp_result.iteritems():
2411         test = hv_result is not None
2412         _ErrorIf(test, constants.CV_ENODEHV, node,
2413                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2414
2415     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416     if ninfo.vm_capable and isinstance(hvp_result, list):
2417       for item, hv_name, hv_result in hvp_result:
2418         _ErrorIf(True, constants.CV_ENODEHV, node,
2419                  "hypervisor %s parameter verify failure (source %s): %s",
2420                  hv_name, item, hv_result)
2421
2422     test = nresult.get(constants.NV_NODESETUP,
2423                        ["Missing NODESETUP results"])
2424     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2425              "; ".join(test))
2426
2427     return True
2428
2429   def _VerifyNodeTime(self, ninfo, nresult,
2430                       nvinfo_starttime, nvinfo_endtime):
2431     """Check the node time.
2432
2433     @type ninfo: L{objects.Node}
2434     @param ninfo: the node to check
2435     @param nresult: the remote results for the node
2436     @param nvinfo_starttime: the start time of the RPC call
2437     @param nvinfo_endtime: the end time of the RPC call
2438
2439     """
2440     node = ninfo.name
2441     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2442
2443     ntime = nresult.get(constants.NV_TIME, None)
2444     try:
2445       ntime_merged = utils.MergeTime(ntime)
2446     except (ValueError, TypeError):
2447       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2448       return
2449
2450     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2454     else:
2455       ntime_diff = None
2456
2457     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458              "Node time diverges by at least %s from master node time",
2459              ntime_diff)
2460
2461   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462     """Check the node LVM results and update info for cross-node checks.
2463
2464     @type ninfo: L{objects.Node}
2465     @param ninfo: the node to check
2466     @param nresult: the remote results for the node
2467     @param vg_name: the configured VG name
2468     @type nimg: L{NodeImage}
2469     @param nimg: node image
2470
2471     """
2472     if vg_name is None:
2473       return
2474
2475     node = ninfo.name
2476     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477
2478     # checks vg existence and size > 20G
2479     vglist = nresult.get(constants.NV_VGLIST, None)
2480     test = not vglist
2481     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2482     if not test:
2483       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484                                             constants.MIN_VG_SIZE)
2485       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2486
2487     # Check PVs
2488     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2489     for em in errmsgs:
2490       self._Error(constants.CV_ENODELVM, node, em)
2491     if pvminmax is not None:
2492       (nimg.pv_min, nimg.pv_max) = pvminmax
2493
2494   def _VerifyGroupLVM(self, node_image, vg_name):
2495     """Check cross-node consistency in LVM.
2496
2497     @type node_image: dict
2498     @param node_image: info about nodes, mapping from node to names to
2499       L{NodeImage} objects
2500     @param vg_name: the configured VG name
2501
2502     """
2503     if vg_name is None:
2504       return
2505
2506     # Only exlcusive storage needs this kind of checks
2507     if not self._exclusive_storage:
2508       return
2509
2510     # exclusive_storage wants all PVs to have the same size (approximately),
2511     # if the smallest and the biggest ones are okay, everything is fine.
2512     # pv_min is None iff pv_max is None
2513     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2514     if not vals:
2515       return
2516     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520                   "PV sizes differ too much in the group; smallest (%s MB) is"
2521                   " on %s, biggest (%s MB) is on %s",
2522                   pvmin, minnode, pvmax, maxnode)
2523
2524   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525     """Check the node bridges.
2526
2527     @type ninfo: L{objects.Node}
2528     @param ninfo: the node to check
2529     @param nresult: the remote results for the node
2530     @param bridges: the expected list of bridges
2531
2532     """
2533     if not bridges:
2534       return
2535
2536     node = ninfo.name
2537     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2538
2539     missing = nresult.get(constants.NV_BRIDGES, None)
2540     test = not isinstance(missing, list)
2541     _ErrorIf(test, constants.CV_ENODENET, node,
2542              "did not return valid bridge information")
2543     if not test:
2544       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2546
2547   def _VerifyNodeUserScripts(self, ninfo, nresult):
2548     """Check the results of user scripts presence and executability on the node
2549
2550     @type ninfo: L{objects.Node}
2551     @param ninfo: the node to check
2552     @param nresult: the remote results for the node
2553
2554     """
2555     node = ninfo.name
2556
2557     test = not constants.NV_USERSCRIPTS in nresult
2558     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559                   "did not return user scripts information")
2560
2561     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2562     if not test:
2563       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564                     "user scripts not present or not executable: %s" %
2565                     utils.CommaJoin(sorted(broken_scripts)))
2566
2567   def _VerifyNodeNetwork(self, ninfo, nresult):
2568     """Check the node network connectivity results.
2569
2570     @type ninfo: L{objects.Node}
2571     @param ninfo: the node to check
2572     @param nresult: the remote results for the node
2573
2574     """
2575     node = ninfo.name
2576     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2577
2578     test = constants.NV_NODELIST not in nresult
2579     _ErrorIf(test, constants.CV_ENODESSH, node,
2580              "node hasn't returned node ssh connectivity data")
2581     if not test:
2582       if nresult[constants.NV_NODELIST]:
2583         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584           _ErrorIf(True, constants.CV_ENODESSH, node,
2585                    "ssh communication with node '%s': %s", a_node, a_msg)
2586
2587     test = constants.NV_NODENETTEST not in nresult
2588     _ErrorIf(test, constants.CV_ENODENET, node,
2589              "node hasn't returned node tcp connectivity data")
2590     if not test:
2591       if nresult[constants.NV_NODENETTEST]:
2592         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2593         for anode in nlist:
2594           _ErrorIf(True, constants.CV_ENODENET, node,
2595                    "tcp communication with node '%s': %s",
2596                    anode, nresult[constants.NV_NODENETTEST][anode])
2597
2598     test = constants.NV_MASTERIP not in nresult
2599     _ErrorIf(test, constants.CV_ENODENET, node,
2600              "node hasn't returned node master IP reachability data")
2601     if not test:
2602       if not nresult[constants.NV_MASTERIP]:
2603         if node == self.master_node:
2604           msg = "the master node cannot reach the master IP (not configured?)"
2605         else:
2606           msg = "cannot reach the master IP"
2607         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2608
2609   def _VerifyInstance(self, instance, inst_config, node_image,
2610                       diskstatus):
2611     """Verify an instance.
2612
2613     This function checks to see if the required block devices are
2614     available on the instance's node, and that the nodes are in the correct
2615     state.
2616
2617     """
2618     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619     pnode = inst_config.primary_node
2620     pnode_img = node_image[pnode]
2621     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2622
2623     node_vol_should = {}
2624     inst_config.MapLVsByNode(node_vol_should)
2625
2626     cluster = self.cfg.GetClusterInfo()
2627     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2628                                                             self.group_info)
2629     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631              code=self.ETYPE_WARNING)
2632
2633     for node in node_vol_should:
2634       n_img = node_image[node]
2635       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636         # ignore missing volumes on offline or broken nodes
2637         continue
2638       for volume in node_vol_should[node]:
2639         test = volume not in n_img.volumes
2640         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641                  "volume %s missing on node %s", volume, node)
2642
2643     if inst_config.admin_state == constants.ADMINST_UP:
2644       test = instance not in pnode_img.instances and not pnode_img.offline
2645       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646                "instance not running on its primary node %s",
2647                pnode)
2648       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649                "instance is marked as running and lives on offline node %s",
2650                pnode)
2651
2652     diskdata = [(nname, success, status, idx)
2653                 for (nname, disks) in diskstatus.items()
2654                 for idx, (success, status) in enumerate(disks)]
2655
2656     for nname, success, bdev_status, idx in diskdata:
2657       # the 'ghost node' construction in Exec() ensures that we have a
2658       # node here
2659       snode = node_image[nname]
2660       bad_snode = snode.ghost or snode.offline
2661       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662                not success and not bad_snode,
2663                constants.CV_EINSTANCEFAULTYDISK, instance,
2664                "couldn't retrieve status for disk/%s on %s: %s",
2665                idx, nname, bdev_status)
2666       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668                constants.CV_EINSTANCEFAULTYDISK, instance,
2669                "disk/%s on %s is faulty", idx, nname)
2670
2671     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673              " primary node failed", instance)
2674
2675     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676              constants.CV_EINSTANCELAYOUT,
2677              instance, "instance has multiple secondary nodes: %s",
2678              utils.CommaJoin(inst_config.secondary_nodes),
2679              code=self.ETYPE_WARNING)
2680
2681     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682       # Disk template not compatible with exclusive_storage: no instance
2683       # node should have the flag set
2684       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685                                                      inst_config.all_nodes)
2686       es_nodes = [n for (n, es) in es_flags.items()
2687                   if es]
2688       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689                "instance has template %s, which is not supported on nodes"
2690                " that have exclusive storage set: %s",
2691                inst_config.disk_template, utils.CommaJoin(es_nodes))
2692
2693     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695       instance_groups = {}
2696
2697       for node in instance_nodes:
2698         instance_groups.setdefault(self.all_node_info[node].group,
2699                                    []).append(node)
2700
2701       pretty_list = [
2702         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703         # Sort so that we always list the primary node first.
2704         for group, nodes in sorted(instance_groups.items(),
2705                                    key=lambda (_, nodes): pnode in nodes,
2706                                    reverse=True)]
2707
2708       self._ErrorIf(len(instance_groups) > 1,
2709                     constants.CV_EINSTANCESPLITGROUPS,
2710                     instance, "instance has primary and secondary nodes in"
2711                     " different groups: %s", utils.CommaJoin(pretty_list),
2712                     code=self.ETYPE_WARNING)
2713
2714     inst_nodes_offline = []
2715     for snode in inst_config.secondary_nodes:
2716       s_img = node_image[snode]
2717       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718                snode, "instance %s, connection to secondary node failed",
2719                instance)
2720
2721       if s_img.offline:
2722         inst_nodes_offline.append(snode)
2723
2724     # warn that the instance lives on offline nodes
2725     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726              "instance has offline secondary node(s) %s",
2727              utils.CommaJoin(inst_nodes_offline))
2728     # ... or ghost/non-vm_capable nodes
2729     for node in inst_config.all_nodes:
2730       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731                instance, "instance lives on ghost node %s", node)
2732       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733                instance, "instance lives on non-vm_capable node %s", node)
2734
2735   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736     """Verify if there are any unknown volumes in the cluster.
2737
2738     The .os, .swap and backup volumes are ignored. All other volumes are
2739     reported as unknown.
2740
2741     @type reserved: L{ganeti.utils.FieldSet}
2742     @param reserved: a FieldSet of reserved volume names
2743
2744     """
2745     for node, n_img in node_image.items():
2746       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747           self.all_node_info[node].group != self.group_uuid):
2748         # skip non-healthy nodes
2749         continue
2750       for volume in n_img.volumes:
2751         test = ((node not in node_vol_should or
2752                 volume not in node_vol_should[node]) and
2753                 not reserved.Matches(volume))
2754         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755                       "volume %s is unknown", volume)
2756
2757   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758     """Verify N+1 Memory Resilience.
2759
2760     Check that if one single node dies we can still start all the
2761     instances it was primary for.
2762
2763     """
2764     cluster_info = self.cfg.GetClusterInfo()
2765     for node, n_img in node_image.items():
2766       # This code checks that every node which is now listed as
2767       # secondary has enough memory to host all instances it is
2768       # supposed to should a single other node in the cluster fail.
2769       # FIXME: not ready for failover to an arbitrary node
2770       # FIXME: does not support file-backed instances
2771       # WARNING: we currently take into account down instances as well
2772       # as up ones, considering that even if they're down someone
2773       # might want to start them even in the event of a node failure.
2774       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775         # we're skipping nodes marked offline and nodes in other groups from
2776         # the N+1 warning, since most likely we don't have good memory
2777         # infromation from them; we already list instances living on such
2778         # nodes, and that's enough warning
2779         continue
2780       #TODO(dynmem): also consider ballooning out other instances
2781       for prinode, instances in n_img.sbp.items():
2782         needed_mem = 0
2783         for instance in instances:
2784           bep = cluster_info.FillBE(instance_cfg[instance])
2785           if bep[constants.BE_AUTO_BALANCE]:
2786             needed_mem += bep[constants.BE_MINMEM]
2787         test = n_img.mfree < needed_mem
2788         self._ErrorIf(test, constants.CV_ENODEN1, node,
2789                       "not enough memory to accomodate instance failovers"
2790                       " should node %s fail (%dMiB needed, %dMiB available)",
2791                       prinode, needed_mem, n_img.mfree)
2792
2793   @classmethod
2794   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795                    (files_all, files_opt, files_mc, files_vm)):
2796     """Verifies file checksums collected from all nodes.
2797
2798     @param errorif: Callback for reporting errors
2799     @param nodeinfo: List of L{objects.Node} objects
2800     @param master_node: Name of master node
2801     @param all_nvinfo: RPC results
2802
2803     """
2804     # Define functions determining which nodes to consider for a file
2805     files2nodefn = [
2806       (files_all, None),
2807       (files_mc, lambda node: (node.master_candidate or
2808                                node.name == master_node)),
2809       (files_vm, lambda node: node.vm_capable),
2810       ]
2811
2812     # Build mapping from filename to list of nodes which should have the file
2813     nodefiles = {}
2814     for (files, fn) in files2nodefn:
2815       if fn is None:
2816         filenodes = nodeinfo
2817       else:
2818         filenodes = filter(fn, nodeinfo)
2819       nodefiles.update((filename,
2820                         frozenset(map(operator.attrgetter("name"), filenodes)))
2821                        for filename in files)
2822
2823     assert set(nodefiles) == (files_all | files_mc | files_vm)
2824
2825     fileinfo = dict((filename, {}) for filename in nodefiles)
2826     ignore_nodes = set()
2827
2828     for node in nodeinfo:
2829       if node.offline:
2830         ignore_nodes.add(node.name)
2831         continue
2832
2833       nresult = all_nvinfo[node.name]
2834
2835       if nresult.fail_msg or not nresult.payload:
2836         node_files = None
2837       else:
2838         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840                           for (key, value) in fingerprints.items())
2841         del fingerprints
2842
2843       test = not (node_files and isinstance(node_files, dict))
2844       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845               "Node did not return file checksum data")
2846       if test:
2847         ignore_nodes.add(node.name)
2848         continue
2849
2850       # Build per-checksum mapping from filename to nodes having it
2851       for (filename, checksum) in node_files.items():
2852         assert filename in nodefiles
2853         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2854
2855     for (filename, checksums) in fileinfo.items():
2856       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2857
2858       # Nodes having the file
2859       with_file = frozenset(node_name
2860                             for nodes in fileinfo[filename].values()
2861                             for node_name in nodes) - ignore_nodes
2862
2863       expected_nodes = nodefiles[filename] - ignore_nodes
2864
2865       # Nodes missing file
2866       missing_file = expected_nodes - with_file
2867
2868       if filename in files_opt:
2869         # All or no nodes
2870         errorif(missing_file and missing_file != expected_nodes,
2871                 constants.CV_ECLUSTERFILECHECK, None,
2872                 "File %s is optional, but it must exist on all or no"
2873                 " nodes (not found on %s)",
2874                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2875       else:
2876         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877                 "File %s is missing from node(s) %s", filename,
2878                 utils.CommaJoin(utils.NiceSort(missing_file)))
2879
2880         # Warn if a node has a file it shouldn't
2881         unexpected = with_file - expected_nodes
2882         errorif(unexpected,
2883                 constants.CV_ECLUSTERFILECHECK, None,
2884                 "File %s should not exist on node(s) %s",
2885                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2886
2887       # See if there are multiple versions of the file
2888       test = len(checksums) > 1
2889       if test:
2890         variants = ["variant %s on %s" %
2891                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892                     for (idx, (checksum, nodes)) in
2893                       enumerate(sorted(checksums.items()))]
2894       else:
2895         variants = []
2896
2897       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898               "File %s found with %s different checksums (%s)",
2899               filename, len(checksums), "; ".join(variants))
2900
2901   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2902                       drbd_map):
2903     """Verifies and the node DRBD status.
2904
2905     @type ninfo: L{objects.Node}
2906     @param ninfo: the node to check
2907     @param nresult: the remote results for the node
2908     @param instanceinfo: the dict of instances
2909     @param drbd_helper: the configured DRBD usermode helper
2910     @param drbd_map: the DRBD map as returned by
2911         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2912
2913     """
2914     node = ninfo.name
2915     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2916
2917     if drbd_helper:
2918       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919       test = (helper_result is None)
2920       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921                "no drbd usermode helper returned")
2922       if helper_result:
2923         status, payload = helper_result
2924         test = not status
2925         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926                  "drbd usermode helper check unsuccessful: %s", payload)
2927         test = status and (payload != drbd_helper)
2928         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929                  "wrong drbd usermode helper: %s", payload)
2930
2931     # compute the DRBD minors
2932     node_drbd = {}
2933     for minor, instance in drbd_map[node].items():
2934       test = instance not in instanceinfo
2935       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936                "ghost instance '%s' in temporary DRBD map", instance)
2937         # ghost instance should not be running, but otherwise we
2938         # don't give double warnings (both ghost instance and
2939         # unallocated minor in use)
2940       if test:
2941         node_drbd[minor] = (instance, False)
2942       else:
2943         instance = instanceinfo[instance]
2944         node_drbd[minor] = (instance.name,
2945                             instance.admin_state == constants.ADMINST_UP)
2946
2947     # and now check them
2948     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949     test = not isinstance(used_minors, (tuple, list))
2950     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951              "cannot parse drbd status file: %s", str(used_minors))
2952     if test:
2953       # we cannot check drbd status
2954       return
2955
2956     for minor, (iname, must_exist) in node_drbd.items():
2957       test = minor not in used_minors and must_exist
2958       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959                "drbd minor %d of instance %s is not active", minor, iname)
2960     for minor in used_minors:
2961       test = minor not in node_drbd
2962       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963                "unallocated drbd minor %d is in use", minor)
2964
2965   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966     """Builds the node OS structures.
2967
2968     @type ninfo: L{objects.Node}
2969     @param ninfo: the node to check
2970     @param nresult: the remote results for the node
2971     @param nimg: the node image object
2972
2973     """
2974     node = ninfo.name
2975     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2976
2977     remote_os = nresult.get(constants.NV_OSLIST, None)
2978     test = (not isinstance(remote_os, list) or
2979             not compat.all(isinstance(v, list) and len(v) == 7
2980                            for v in remote_os))
2981
2982     _ErrorIf(test, constants.CV_ENODEOS, node,
2983              "node hasn't returned valid OS data")
2984
2985     nimg.os_fail = test
2986
2987     if test:
2988       return
2989
2990     os_dict = {}
2991
2992     for (name, os_path, status, diagnose,
2993          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2994
2995       if name not in os_dict:
2996         os_dict[name] = []
2997
2998       # parameters is a list of lists instead of list of tuples due to
2999       # JSON lacking a real tuple type, fix it:
3000       parameters = [tuple(v) for v in parameters]
3001       os_dict[name].append((os_path, status, diagnose,
3002                             set(variants), set(parameters), set(api_ver)))
3003
3004     nimg.oslist = os_dict
3005
3006   def _VerifyNodeOS(self, ninfo, nimg, base):
3007     """Verifies the node OS list.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nimg: the node image object
3012     @param base: the 'template' node we match against (e.g. from the master)
3013
3014     """
3015     node = ninfo.name
3016     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3017
3018     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3019
3020     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021     for os_name, os_data in nimg.oslist.items():
3022       assert os_data, "Empty OS status for OS %s?!" % os_name
3023       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027                "OS '%s' has multiple entries (first one shadows the rest): %s",
3028                os_name, utils.CommaJoin([v[0] for v in os_data]))
3029       # comparisons with the 'base' image
3030       test = os_name not in base.oslist
3031       _ErrorIf(test, constants.CV_ENODEOS, node,
3032                "Extra OS %s not present on reference node (%s)",
3033                os_name, base.name)
3034       if test:
3035         continue
3036       assert base.oslist[os_name], "Base node has empty OS status?"
3037       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3038       if not b_status:
3039         # base OS is invalid, skipping
3040         continue
3041       for kind, a, b in [("API version", f_api, b_api),
3042                          ("variants list", f_var, b_var),
3043                          ("parameters", beautify_params(f_param),
3044                           beautify_params(b_param))]:
3045         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047                  kind, os_name, base.name,
3048                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3049
3050     # check any missing OSes
3051     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052     _ErrorIf(missing, constants.CV_ENODEOS, node,
3053              "OSes present on reference node %s but missing on this node: %s",
3054              base.name, utils.CommaJoin(missing))
3055
3056   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3058
3059     @type ninfo: L{objects.Node}
3060     @param ninfo: the node to check
3061     @param nresult: the remote results for the node
3062     @type is_master: bool
3063     @param is_master: Whether node is the master node
3064
3065     """
3066     node = ninfo.name
3067
3068     if (is_master and
3069         (constants.ENABLE_FILE_STORAGE or
3070          constants.ENABLE_SHARED_FILE_STORAGE)):
3071       try:
3072         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3073       except KeyError:
3074         # This should never happen
3075         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076                       "Node did not return forbidden file storage paths")
3077       else:
3078         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079                       "Found forbidden file storage paths: %s",
3080                       utils.CommaJoin(fspaths))
3081     else:
3082       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083                     constants.CV_ENODEFILESTORAGEPATHS, node,
3084                     "Node should not have returned forbidden file storage"
3085                     " paths")
3086
3087   def _VerifyOob(self, ninfo, nresult):
3088     """Verifies out of band functionality of a node.
3089
3090     @type ninfo: L{objects.Node}
3091     @param ninfo: the node to check
3092     @param nresult: the remote results for the node
3093
3094     """
3095     node = ninfo.name
3096     # We just have to verify the paths on master and/or master candidates
3097     # as the oob helper is invoked on the master
3098     if ((ninfo.master_candidate or ninfo.master_capable) and
3099         constants.NV_OOB_PATHS in nresult):
3100       for path_result in nresult[constants.NV_OOB_PATHS]:
3101         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3102
3103   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104     """Verifies and updates the node volume data.
3105
3106     This function will update a L{NodeImage}'s internal structures
3107     with data from the remote call.
3108
3109     @type ninfo: L{objects.Node}
3110     @param ninfo: the node to check
3111     @param nresult: the remote results for the node
3112     @param nimg: the node image object
3113     @param vg_name: the configured VG name
3114
3115     """
3116     node = ninfo.name
3117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118
3119     nimg.lvm_fail = True
3120     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3121     if vg_name is None:
3122       pass
3123     elif isinstance(lvdata, basestring):
3124       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125                utils.SafeEncode(lvdata))
3126     elif not isinstance(lvdata, dict):
3127       _ErrorIf(True, constants.CV_ENODELVM, node,
3128                "rpc call to node failed (lvlist)")
3129     else:
3130       nimg.volumes = lvdata
3131       nimg.lvm_fail = False
3132
3133   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134     """Verifies and updates the node instance list.
3135
3136     If the listing was successful, then updates this node's instance
3137     list. Otherwise, it marks the RPC call as failed for the instance
3138     list key.
3139
3140     @type ninfo: L{objects.Node}
3141     @param ninfo: the node to check
3142     @param nresult: the remote results for the node
3143     @param nimg: the node image object
3144
3145     """
3146     idata = nresult.get(constants.NV_INSTANCELIST, None)
3147     test = not isinstance(idata, list)
3148     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149                   "rpc call to node failed (instancelist): %s",
3150                   utils.SafeEncode(str(idata)))
3151     if test:
3152       nimg.hyp_fail = True
3153     else:
3154       nimg.instances = idata
3155
3156   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157     """Verifies and computes a node information map
3158
3159     @type ninfo: L{objects.Node}
3160     @param ninfo: the node to check
3161     @param nresult: the remote results for the node
3162     @param nimg: the node image object
3163     @param vg_name: the configured VG name
3164
3165     """
3166     node = ninfo.name
3167     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3168
3169     # try to read free memory (from the hypervisor)
3170     hv_info = nresult.get(constants.NV_HVINFO, None)
3171     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172     _ErrorIf(test, constants.CV_ENODEHV, node,
3173              "rpc call to node failed (hvinfo)")
3174     if not test:
3175       try:
3176         nimg.mfree = int(hv_info["memory_free"])
3177       except (ValueError, TypeError):
3178         _ErrorIf(True, constants.CV_ENODERPC, node,
3179                  "node returned invalid nodeinfo, check hypervisor")
3180
3181     # FIXME: devise a free space model for file based instances as well
3182     if vg_name is not None:
3183       test = (constants.NV_VGLIST not in nresult or
3184               vg_name not in nresult[constants.NV_VGLIST])
3185       _ErrorIf(test, constants.CV_ENODELVM, node,
3186                "node didn't return data for the volume group '%s'"
3187                " - it is either missing or broken", vg_name)
3188       if not test:
3189         try:
3190           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191         except (ValueError, TypeError):
3192           _ErrorIf(True, constants.CV_ENODERPC, node,
3193                    "node returned invalid LVM info, check LVM status")
3194
3195   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196     """Gets per-disk status information for all instances.
3197
3198     @type nodelist: list of strings
3199     @param nodelist: Node names
3200     @type node_image: dict of (name, L{objects.Node})
3201     @param node_image: Node objects
3202     @type instanceinfo: dict of (name, L{objects.Instance})
3203     @param instanceinfo: Instance objects
3204     @rtype: {instance: {node: [(succes, payload)]}}
3205     @return: a dictionary of per-instance dictionaries with nodes as
3206         keys and disk information as values; the disk information is a
3207         list of tuples (success, payload)
3208
3209     """
3210     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3211
3212     node_disks = {}
3213     node_disks_devonly = {}
3214     diskless_instances = set()
3215     diskless = constants.DT_DISKLESS
3216
3217     for nname in nodelist:
3218       node_instances = list(itertools.chain(node_image[nname].pinst,
3219                                             node_image[nname].sinst))
3220       diskless_instances.update(inst for inst in node_instances
3221                                 if instanceinfo[inst].disk_template == diskless)
3222       disks = [(inst, disk)
3223                for inst in node_instances
3224                for disk in instanceinfo[inst].disks]
3225
3226       if not disks:
3227         # No need to collect data
3228         continue
3229
3230       node_disks[nname] = disks
3231
3232       # _AnnotateDiskParams makes already copies of the disks
3233       devonly = []
3234       for (inst, dev) in disks:
3235         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236         self.cfg.SetDiskID(anno_disk, nname)
3237         devonly.append(anno_disk)
3238
3239       node_disks_devonly[nname] = devonly
3240
3241     assert len(node_disks) == len(node_disks_devonly)
3242
3243     # Collect data from all nodes with disks
3244     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3245                                                           node_disks_devonly)
3246
3247     assert len(result) == len(node_disks)
3248
3249     instdisk = {}
3250
3251     for (nname, nres) in result.items():
3252       disks = node_disks[nname]
3253
3254       if nres.offline:
3255         # No data from this node
3256         data = len(disks) * [(False, "node offline")]
3257       else:
3258         msg = nres.fail_msg
3259         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260                  "while getting disk information: %s", msg)
3261         if msg:
3262           # No data from this node
3263           data = len(disks) * [(False, msg)]
3264         else:
3265           data = []
3266           for idx, i in enumerate(nres.payload):
3267             if isinstance(i, (tuple, list)) and len(i) == 2:
3268               data.append(i)
3269             else:
3270               logging.warning("Invalid result from node %s, entry %d: %s",
3271                               nname, idx, i)
3272               data.append((False, "Invalid result from the remote node"))
3273
3274       for ((inst, _), status) in zip(disks, data):
3275         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3276
3277     # Add empty entries for diskless instances.
3278     for inst in diskless_instances:
3279       assert inst not in instdisk
3280       instdisk[inst] = {}
3281
3282     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284                       compat.all(isinstance(s, (tuple, list)) and
3285                                  len(s) == 2 for s in statuses)
3286                       for inst, nnames in instdisk.items()
3287                       for nname, statuses in nnames.items())
3288     if __debug__:
3289       instdisk_keys = set(instdisk)
3290       instanceinfo_keys = set(instanceinfo)
3291       assert instdisk_keys == instanceinfo_keys, \
3292         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293          (instdisk_keys, instanceinfo_keys))
3294
3295     return instdisk
3296
3297   @staticmethod
3298   def _SshNodeSelector(group_uuid, all_nodes):
3299     """Create endless iterators for all potential SSH check hosts.
3300
3301     """
3302     nodes = [node for node in all_nodes
3303              if (node.group != group_uuid and
3304                  not node.offline)]
3305     keyfunc = operator.attrgetter("group")
3306
3307     return map(itertools.cycle,
3308                [sorted(map(operator.attrgetter("name"), names))
3309                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3310                                                   keyfunc)])
3311
3312   @classmethod
3313   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314     """Choose which nodes should talk to which other nodes.
3315
3316     We will make nodes contact all nodes in their group, and one node from
3317     every other group.
3318
3319     @warning: This algorithm has a known issue if one node group is much
3320       smaller than others (e.g. just one node). In such a case all other
3321       nodes will talk to the single node.
3322
3323     """
3324     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3326
3327     return (online_nodes,
3328             dict((name, sorted([i.next() for i in sel]))
3329                  for name in online_nodes))
3330
3331   def BuildHooksEnv(self):
3332     """Build hooks env.
3333
3334     Cluster-Verify hooks just ran in the post phase and their failure makes
3335     the output be logged in the verify output and the verification to fail.
3336
3337     """
3338     env = {
3339       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3340       }
3341
3342     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343                for node in self.my_node_info.values())
3344
3345     return env
3346
3347   def BuildHooksNodes(self):
3348     """Build hooks nodes.
3349
3350     """
3351     return ([], self.my_node_names)
3352
3353   def Exec(self, feedback_fn):
3354     """Verify integrity of the node group, performing various test on nodes.
3355
3356     """
3357     # This method has too many local variables. pylint: disable=R0914
3358     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3359
3360     if not self.my_node_names:
3361       # empty node group
3362       feedback_fn("* Empty node group, skipping verification")
3363       return True
3364
3365     self.bad = False
3366     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367     verbose = self.op.verbose
3368     self._feedback_fn = feedback_fn
3369
3370     vg_name = self.cfg.GetVGName()
3371     drbd_helper = self.cfg.GetDRBDHelper()
3372     cluster = self.cfg.GetClusterInfo()
3373     hypervisors = cluster.enabled_hypervisors
3374     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3375
3376     i_non_redundant = [] # Non redundant instances
3377     i_non_a_balanced = [] # Non auto-balanced instances
3378     i_offline = 0 # Count of offline instances
3379     n_offline = 0 # Count of offline nodes
3380     n_drained = 0 # Count of nodes being drained
3381     node_vol_should = {}
3382
3383     # FIXME: verify OS list
3384
3385     # File verification
3386     filemap = _ComputeAncillaryFiles(cluster, False)
3387
3388     # do local checksums
3389     master_node = self.master_node = self.cfg.GetMasterNode()
3390     master_ip = self.cfg.GetMasterIP()
3391
3392     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3393
3394     user_scripts = []
3395     if self.cfg.GetUseExternalMipScript():
3396       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3397
3398     node_verify_param = {
3399       constants.NV_FILELIST:
3400         map(vcluster.MakeVirtualPath,
3401             utils.UniqueSequence(filename
3402                                  for files in filemap
3403                                  for filename in files)),
3404       constants.NV_NODELIST:
3405         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406                                   self.all_node_info.values()),
3407       constants.NV_HYPERVISOR: hypervisors,
3408       constants.NV_HVPARAMS:
3409         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411                                  for node in node_data_list
3412                                  if not node.offline],
3413       constants.NV_INSTANCELIST: hypervisors,
3414       constants.NV_VERSION: None,
3415       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416       constants.NV_NODESETUP: None,
3417       constants.NV_TIME: None,
3418       constants.NV_MASTERIP: (master_node, master_ip),
3419       constants.NV_OSLIST: None,
3420       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421       constants.NV_USERSCRIPTS: user_scripts,
3422       }
3423
3424     if vg_name is not None:
3425       node_verify_param[constants.NV_VGLIST] = None
3426       node_verify_param[constants.NV_LVLIST] = vg_name
3427       node_verify_param[constants.NV_PVLIST] = [vg_name]
3428
3429     if drbd_helper:
3430       node_verify_param[constants.NV_DRBDLIST] = None
3431       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3432
3433     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434       # Load file storage paths only from master node
3435       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3436
3437     # bridge checks
3438     # FIXME: this needs to be changed per node-group, not cluster-wide
3439     bridges = set()
3440     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442       bridges.add(default_nicpp[constants.NIC_LINK])
3443     for instance in self.my_inst_info.values():
3444       for nic in instance.nics:
3445         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447           bridges.add(full_nic[constants.NIC_LINK])
3448
3449     if bridges:
3450       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3451
3452     # Build our expected cluster state
3453     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3454                                                  name=node.name,
3455                                                  vm_capable=node.vm_capable))
3456                       for node in node_data_list)
3457
3458     # Gather OOB paths
3459     oob_paths = []
3460     for node in self.all_node_info.values():
3461       path = _SupportsOob(self.cfg, node)
3462       if path and path not in oob_paths:
3463         oob_paths.append(path)
3464
3465     if oob_paths:
3466       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3467
3468     for instance in self.my_inst_names:
3469       inst_config = self.my_inst_info[instance]
3470       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3471         i_offline += 1
3472
3473       for nname in inst_config.all_nodes:
3474         if nname not in node_image:
3475           gnode = self.NodeImage(name=nname)
3476           gnode.ghost = (nname not in self.all_node_info)
3477           node_image[nname] = gnode
3478
3479       inst_config.MapLVsByNode(node_vol_should)
3480
3481       pnode = inst_config.primary_node
3482       node_image[pnode].pinst.append(instance)
3483
3484       for snode in inst_config.secondary_nodes:
3485         nimg = node_image[snode]
3486         nimg.sinst.append(instance)
3487         if pnode not in nimg.sbp:
3488           nimg.sbp[pnode] = []
3489         nimg.sbp[pnode].append(instance)
3490
3491     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492     # The value of exclusive_storage should be the same across the group, so if
3493     # it's True for at least a node, we act as if it were set for all the nodes
3494     self._exclusive_storage = compat.any(es_flags.values())
3495     if self._exclusive_storage:
3496       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3497
3498     # At this point, we have the in-memory data structures complete,
3499     # except for the runtime information, which we'll gather next
3500
3501     # Due to the way our RPC system works, exact response times cannot be
3502     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503     # time before and after executing the request, we can at least have a time
3504     # window.
3505     nvinfo_starttime = time.time()
3506     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3507                                            node_verify_param,
3508                                            self.cfg.GetClusterName())
3509     nvinfo_endtime = time.time()
3510
3511     if self.extra_lv_nodes and vg_name is not None:
3512       extra_lv_nvinfo = \
3513           self.rpc.call_node_verify(self.extra_lv_nodes,
3514                                     {constants.NV_LVLIST: vg_name},
3515                                     self.cfg.GetClusterName())
3516     else:
3517       extra_lv_nvinfo = {}
3518
3519     all_drbd_map = self.cfg.ComputeDRBDMap()
3520
3521     feedback_fn("* Gathering disk information (%s nodes)" %
3522                 len(self.my_node_names))
3523     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3524                                      self.my_inst_info)
3525
3526     feedback_fn("* Verifying configuration file consistency")
3527
3528     # If not all nodes are being checked, we need to make sure the master node
3529     # and a non-checked vm_capable node are in the list.
3530     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3531     if absent_nodes:
3532       vf_nvinfo = all_nvinfo.copy()
3533       vf_node_info = list(self.my_node_info.values())
3534       additional_nodes = []
3535       if master_node not in self.my_node_info:
3536         additional_nodes.append(master_node)
3537         vf_node_info.append(self.all_node_info[master_node])
3538       # Add the first vm_capable node we find which is not included,
3539       # excluding the master node (which we already have)
3540       for node in absent_nodes:
3541         nodeinfo = self.all_node_info[node]
3542         if (nodeinfo.vm_capable and not nodeinfo.offline and
3543             node != master_node):
3544           additional_nodes.append(node)
3545           vf_node_info.append(self.all_node_info[node])
3546           break
3547       key = constants.NV_FILELIST
3548       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549                                                  {key: node_verify_param[key]},
3550                                                  self.cfg.GetClusterName()))
3551     else:
3552       vf_nvinfo = all_nvinfo
3553       vf_node_info = self.my_node_info.values()
3554
3555     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3556
3557     feedback_fn("* Verifying node status")
3558
3559     refos_img = None
3560
3561     for node_i in node_data_list:
3562       node = node_i.name
3563       nimg = node_image[node]
3564
3565       if node_i.offline:
3566         if verbose:
3567           feedback_fn("* Skipping offline node %s" % (node,))
3568         n_offline += 1
3569         continue
3570
3571       if node == master_node:
3572         ntype = "master"
3573       elif node_i.master_candidate:
3574         ntype = "master candidate"
3575       elif node_i.drained:
3576         ntype = "drained"
3577         n_drained += 1
3578       else:
3579         ntype = "regular"
3580       if verbose:
3581         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3582
3583       msg = all_nvinfo[node].fail_msg
3584       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3585                msg)
3586       if msg:
3587         nimg.rpc_fail = True
3588         continue
3589
3590       nresult = all_nvinfo[node].payload
3591
3592       nimg.call_ok = self._VerifyNode(node_i, nresult)
3593       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594       self._VerifyNodeNetwork(node_i, nresult)
3595       self._VerifyNodeUserScripts(node_i, nresult)
3596       self._VerifyOob(node_i, nresult)
3597       self._VerifyFileStoragePaths(node_i, nresult,
3598                                    node == master_node)
3599
3600       if nimg.vm_capable:
3601         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3603                              all_drbd_map)
3604
3605         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606         self._UpdateNodeInstances(node_i, nresult, nimg)
3607         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608         self._UpdateNodeOS(node_i, nresult, nimg)
3609
3610         if not nimg.os_fail:
3611           if refos_img is None:
3612             refos_img = nimg
3613           self._VerifyNodeOS(node_i, nimg, refos_img)
3614         self._VerifyNodeBridges(node_i, nresult, bridges)
3615
3616         # Check whether all running instancies are primary for the node. (This
3617         # can no longer be done from _VerifyInstance below, since some of the
3618         # wrong instances could be from other node groups.)
3619         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3620
3621         for inst in non_primary_inst:
3622           test = inst in self.all_inst_info
3623           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624                    "instance should not run on node %s", node_i.name)
3625           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626                    "node is running unknown instance %s", inst)
3627
3628     self._VerifyGroupLVM(node_image, vg_name)
3629
3630     for node, result in extra_lv_nvinfo.items():
3631       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632                               node_image[node], vg_name)
3633
3634     feedback_fn("* Verifying instance status")
3635     for instance in self.my_inst_names:
3636       if verbose:
3637         feedback_fn("* Verifying instance %s" % instance)
3638       inst_config = self.my_inst_info[instance]
3639       self._VerifyInstance(instance, inst_config, node_image,
3640                            instdisk[instance])
3641
3642       # If the instance is non-redundant we cannot survive losing its primary
3643       # node, so we are not N+1 compliant.
3644       if inst_config.disk_template not in constants.DTS_MIRRORED:
3645         i_non_redundant.append(instance)
3646
3647       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648         i_non_a_balanced.append(instance)
3649
3650     feedback_fn("* Verifying orphan volumes")
3651     reserved = utils.FieldSet(*cluster.reserved_lvs)
3652
3653     # We will get spurious "unknown volume" warnings if any node of this group
3654     # is secondary for an instance whose primary is in another group. To avoid
3655     # them, we find these instances and add their volumes to node_vol_should.
3656     for inst in self.all_inst_info.values():
3657       for secondary in inst.secondary_nodes:
3658         if (secondary in self.my_node_info
3659             and inst.name not in self.my_inst_info):
3660           inst.MapLVsByNode(node_vol_should)
3661           break
3662
3663     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3664
3665     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666       feedback_fn("* Verifying N+1 Memory redundancy")
3667       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3668
3669     feedback_fn("* Other Notes")
3670     if i_non_redundant:
3671       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3672                   % len(i_non_redundant))
3673
3674     if i_non_a_balanced:
3675       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3676                   % len(i_non_a_balanced))
3677
3678     if i_offline:
3679       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3680
3681     if n_offline:
3682       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3683
3684     if n_drained:
3685       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3686
3687     return not self.bad
3688
3689   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690     """Analyze the post-hooks' result
3691
3692     This method analyses the hook result, handles it, and sends some
3693     nicely-formatted feedback back to the user.
3694
3695     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697     @param hooks_results: the results of the multi-node hooks rpc call
3698     @param feedback_fn: function used send feedback back to the caller
3699     @param lu_result: previous Exec result
3700     @return: the new Exec result, based on the previous result
3701         and hook results
3702
3703     """
3704     # We only really run POST phase hooks, only for non-empty groups,
3705     # and are only interested in their results
3706     if not self.my_node_names:
3707       # empty node group
3708       pass
3709     elif phase == constants.HOOKS_PHASE_POST:
3710       # Used to change hooks' output to proper indentation
3711       feedback_fn("* Hooks Results")
3712       assert hooks_results, "invalid result from hooks"
3713
3714       for node_name in hooks_results:
3715         res = hooks_results[node_name]
3716         msg = res.fail_msg
3717         test = msg and not res.offline
3718         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719                       "Communication failure in hooks execution: %s", msg)
3720         if res.offline or msg:
3721           # No need to investigate payload if node is offline or gave
3722           # an error.
3723           continue
3724         for script, hkr, output in res.payload:
3725           test = hkr == constants.HKR_FAIL
3726           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727                         "Script %s failed, output:", script)
3728           if test:
3729             output = self._HOOKS_INDENT_RE.sub("      ", output)
3730             feedback_fn("%s" % output)
3731             lu_result = False
3732
3733     return lu_result
3734
3735
3736 class LUClusterVerifyDisks(NoHooksLU):
3737   """Verifies the cluster disks status.
3738
3739   """
3740   REQ_BGL = False
3741
3742   def ExpandNames(self):
3743     self.share_locks = _ShareAll()
3744     self.needed_locks = {
3745       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3746       }
3747
3748   def Exec(self, feedback_fn):
3749     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3750
3751     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753                            for group in group_names])
3754
3755
3756 class LUGroupVerifyDisks(NoHooksLU):
3757   """Verifies the status of all disks in a node group.
3758
3759   """
3760   REQ_BGL = False
3761
3762   def ExpandNames(self):
3763     # Raises errors.OpPrereqError on its own if group can't be found
3764     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3765
3766     self.share_locks = _ShareAll()
3767     self.needed_locks = {
3768       locking.LEVEL_INSTANCE: [],
3769       locking.LEVEL_NODEGROUP: [],
3770       locking.LEVEL_NODE: [],
3771
3772       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773       # starts one instance of this opcode for every group, which means all
3774       # nodes will be locked for a short amount of time, so it's better to
3775       # acquire the node allocation lock as well.
3776       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3777       }
3778
3779   def DeclareLocks(self, level):
3780     if level == locking.LEVEL_INSTANCE:
3781       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3782
3783       # Lock instances optimistically, needs verification once node and group
3784       # locks have been acquired
3785       self.needed_locks[locking.LEVEL_INSTANCE] = \
3786         self.cfg.GetNodeGroupInstances(self.group_uuid)
3787
3788     elif level == locking.LEVEL_NODEGROUP:
3789       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3790
3791       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792         set([self.group_uuid] +
3793             # Lock all groups used by instances optimistically; this requires
3794             # going via the node before it's locked, requiring verification
3795             # later on
3796             [group_uuid
3797              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3799
3800     elif level == locking.LEVEL_NODE:
3801       # This will only lock the nodes in the group to be verified which contain
3802       # actual instances
3803       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804       self._LockInstancesNodes()
3805
3806       # Lock all nodes in group to be verified
3807       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3810
3811   def CheckPrereq(self):
3812     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3815
3816     assert self.group_uuid in owned_groups
3817
3818     # Check if locked instances are still correct
3819     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3820
3821     # Get instance information
3822     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3823
3824     # Check if node groups for locked instances are still correct
3825     _CheckInstancesNodeGroups(self.cfg, self.instances,
3826                               owned_groups, owned_nodes, self.group_uuid)
3827
3828   def Exec(self, feedback_fn):
3829     """Verify integrity of cluster disks.
3830
3831     @rtype: tuple of three items
3832     @return: a tuple of (dict of node-to-node_error, list of instances
3833         which need activate-disks, dict of instance: (node, volume) for
3834         missing volumes
3835
3836     """
3837     res_nodes = {}
3838     res_instances = set()
3839     res_missing = {}
3840
3841     nv_dict = _MapInstanceDisksToNodes(
3842       [inst for inst in self.instances.values()
3843        if inst.admin_state == constants.ADMINST_UP])
3844
3845     if nv_dict:
3846       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847                              set(self.cfg.GetVmCapableNodeList()))
3848
3849       node_lvs = self.rpc.call_lv_list(nodes, [])
3850
3851       for (node, node_res) in node_lvs.items():
3852         if node_res.offline:
3853           continue
3854
3855         msg = node_res.fail_msg
3856         if msg:
3857           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858           res_nodes[node] = msg
3859           continue
3860
3861         for lv_name, (_, _, lv_online) in node_res.payload.items():
3862           inst = nv_dict.pop((node, lv_name), None)
3863           if not (lv_online or inst is None):
3864             res_instances.add(inst)
3865
3866       # any leftover items in nv_dict are missing LVs, let's arrange the data
3867       # better
3868       for key, inst in nv_dict.iteritems():
3869         res_missing.setdefault(inst, []).append(list(key))
3870
3871     return (res_nodes, list(res_instances), res_missing)
3872
3873
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875   """Verifies the cluster disks sizes.
3876
3877   """
3878   REQ_BGL = False
3879
3880   def ExpandNames(self):
3881     if self.op.instances:
3882       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883       # Not getting the node allocation lock as only a specific set of
3884       # instances (and their nodes) is going to be acquired
3885       self.needed_locks = {
3886         locking.LEVEL_NODE_RES: [],
3887         locking.LEVEL_INSTANCE: self.wanted_names,
3888         }
3889       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3890     else:
3891       self.wanted_names = None
3892       self.needed_locks = {
3893         locking.LEVEL_NODE_RES: locking.ALL_SET,
3894         locking.LEVEL_INSTANCE: locking.ALL_SET,
3895
3896         # This opcode is acquires the node locks for all instances
3897         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3898         }
3899
3900     self.share_locks = {
3901       locking.LEVEL_NODE_RES: 1,
3902       locking.LEVEL_INSTANCE: 0,
3903       locking.LEVEL_NODE_ALLOC: 1,
3904       }
3905
3906   def DeclareLocks(self, level):
3907     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908       self._LockInstancesNodes(primary_only=True, level=level)
3909
3910   def CheckPrereq(self):
3911     """Check prerequisites.
3912
3913     This only checks the optional instance list against the existing names.
3914
3915     """
3916     if self.wanted_names is None:
3917       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3918
3919     self.wanted_instances = \
3920         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3921
3922   def _EnsureChildSizes(self, disk):
3923     """Ensure children of the disk have the needed disk size.
3924
3925     This is valid mainly for DRBD8 and fixes an issue where the
3926     children have smaller disk size.
3927
3928     @param disk: an L{ganeti.objects.Disk} object
3929
3930     """
3931     if disk.dev_type == constants.LD_DRBD8:
3932       assert disk.children, "Empty children for DRBD8?"
3933       fchild = disk.children[0]
3934       mismatch = fchild.size < disk.size
3935       if mismatch:
3936         self.LogInfo("Child disk has size %d, parent %d, fixing",
3937                      fchild.size, disk.size)
3938         fchild.size = disk.size
3939
3940       # and we recurse on this child only, not on the metadev
3941       return self._EnsureChildSizes(fchild) or mismatch
3942     else:
3943       return False
3944
3945   def Exec(self, feedback_fn):
3946     """Verify the size of cluster disks.
3947
3948     """
3949     # TODO: check child disks too
3950     # TODO: check differences in size between primary/secondary nodes
3951     per_node_disks = {}
3952     for instance in self.wanted_instances:
3953       pnode = instance.primary_node
3954       if pnode not in per_node_disks:
3955         per_node_disks[pnode] = []
3956       for idx, disk in enumerate(instance.disks):
3957         per_node_disks[pnode].append((instance, idx, disk))
3958
3959     assert not (frozenset(per_node_disks.keys()) -
3960                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961       "Not owning correct locks"
3962     assert not self.owned_locks(locking.LEVEL_NODE)
3963
3964     changed = []
3965     for node, dskl in per_node_disks.items():
3966       newl = [v[2].Copy() for v in dskl]
3967       for dsk in newl:
3968         self.cfg.SetDiskID(dsk, node)
3969       result = self.rpc.call_blockdev_getsize(node, newl)
3970       if result.fail_msg:
3971         self.LogWarning("Failure in blockdev_getsize call to node"
3972                         " %s, ignoring", node)
3973         continue
3974       if len(result.payload) != len(dskl):
3975         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976                         " result.payload=%s", node, len(dskl), result.payload)
3977         self.LogWarning("Invalid result from node %s, ignoring node results",
3978                         node)
3979         continue
3980       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3981         if size is None:
3982           self.LogWarning("Disk %d of instance %s did not return size"
3983                           " information, ignoring", idx, instance.name)
3984           continue
3985         if not isinstance(size, (int, long)):
3986           self.LogWarning("Disk %d of instance %s did not return valid"
3987                           " size information, ignoring", idx, instance.name)
3988           continue
3989         size = size >> 20
3990         if size != disk.size:
3991           self.LogInfo("Disk %d of instance %s has mismatched size,"
3992                        " correcting: recorded %d, actual %d", idx,
3993                        instance.name, disk.size, size)
3994           disk.size = size
3995           self.cfg.Update(instance, feedback_fn)
3996           changed.append((instance.name, idx, size))
3997         if self._EnsureChildSizes(disk):
3998           self.cfg.Update(instance, feedback_fn)
3999           changed.append((instance.name, idx, disk.size))
4000     return changed
4001
4002
4003 class LUClusterRename(LogicalUnit):
4004   """Rename the cluster.
4005
4006   """
4007   HPATH = "cluster-rename"
4008   HTYPE = constants.HTYPE_CLUSTER
4009
4010   def BuildHooksEnv(self):
4011     """Build hooks env.
4012
4013     """
4014     return {
4015       "OP_TARGET": self.cfg.GetClusterName(),
4016       "NEW_NAME": self.op.name,
4017       }
4018
4019   def BuildHooksNodes(self):
4020     """Build hooks nodes.
4021
4022     """
4023     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4024
4025   def CheckPrereq(self):
4026     """Verify that the passed name is a valid one.
4027
4028     """
4029     hostname = netutils.GetHostname(name=self.op.name,
4030                                     family=self.cfg.GetPrimaryIPFamily())
4031
4032     new_name = hostname.name
4033     self.ip = new_ip = hostname.ip
4034     old_name = self.cfg.GetClusterName()
4035     old_ip = self.cfg.GetMasterIP()
4036     if new_name == old_name and new_ip == old_ip:
4037       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038                                  " cluster has changed",
4039                                  errors.ECODE_INVAL)
4040     if new_ip != old_ip:
4041       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043                                    " reachable on the network" %
4044                                    new_ip, errors.ECODE_NOTUNIQUE)
4045
4046     self.op.name = new_name
4047
4048   def Exec(self, feedback_fn):
4049     """Rename the cluster.
4050
4051     """
4052     clustername = self.op.name
4053     new_ip = self.ip
4054
4055     # shutdown the master IP
4056     master_params = self.cfg.GetMasterNetworkParameters()
4057     ems = self.cfg.GetUseExternalMipScript()
4058     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4059                                                      master_params, ems)
4060     result.Raise("Could not disable the master role")
4061
4062     try:
4063       cluster = self.cfg.GetClusterInfo()
4064       cluster.cluster_name = clustername
4065       cluster.master_ip = new_ip
4066       self.cfg.Update(cluster, feedback_fn)
4067
4068       # update the known hosts file
4069       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070       node_list = self.cfg.GetOnlineNodeList()
4071       try:
4072         node_list.remove(master_params.name)
4073       except ValueError:
4074         pass
4075       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4076     finally:
4077       master_params.ip = new_ip
4078       result = self.rpc.call_node_activate_master_ip(master_params.name,
4079                                                      master_params, ems)
4080       msg = result.fail_msg
4081       if msg:
4082         self.LogWarning("Could not re-enable the master role on"
4083                         " the master, please restart manually: %s", msg)
4084
4085     return clustername
4086
4087
4088 def _ValidateNetmask(cfg, netmask):
4089   """Checks if a netmask is valid.
4090
4091   @type cfg: L{config.ConfigWriter}
4092   @param cfg: The cluster configuration
4093   @type netmask: int
4094   @param netmask: the netmask to be verified
4095   @raise errors.OpPrereqError: if the validation fails
4096
4097   """
4098   ip_family = cfg.GetPrimaryIPFamily()
4099   try:
4100     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101   except errors.ProgrammerError:
4102     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103                                ip_family, errors.ECODE_INVAL)
4104   if not ipcls.ValidateNetmask(netmask):
4105     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106                                 (netmask), errors.ECODE_INVAL)
4107
4108
4109 class LUClusterSetParams(LogicalUnit):
4110   """Change the parameters of the cluster.
4111
4112   """
4113   HPATH = "cluster-modify"
4114   HTYPE = constants.HTYPE_CLUSTER
4115   REQ_BGL = False
4116
4117   def CheckArguments(self):
4118     """Check parameters
4119
4120     """
4121     if self.op.uid_pool:
4122       uidpool.CheckUidPool(self.op.uid_pool)
4123
4124     if self.op.add_uids:
4125       uidpool.CheckUidPool(self.op.add_uids)
4126
4127     if self.op.remove_uids:
4128       uidpool.CheckUidPool(self.op.remove_uids)
4129
4130     if self.op.master_netmask is not None:
4131       _ValidateNetmask(self.cfg, self.op.master_netmask)
4132
4133     if self.op.diskparams:
4134       for dt_params in self.op.diskparams.values():
4135         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4136       try:
4137         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138       except errors.OpPrereqError, err:
4139         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4140                                    errors.ECODE_INVAL)
4141
4142   def ExpandNames(self):
4143     # FIXME: in the future maybe other cluster params won't require checking on
4144     # all nodes to be modified.
4145     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146     # resource locks the right thing, shouldn't it be the BGL instead?
4147     self.needed_locks = {
4148       locking.LEVEL_NODE: locking.ALL_SET,
4149       locking.LEVEL_INSTANCE: locking.ALL_SET,
4150       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4152     }
4153     self.share_locks = _ShareAll()
4154
4155   def BuildHooksEnv(self):
4156     """Build hooks env.
4157
4158     """
4159     return {
4160       "OP_TARGET": self.cfg.GetClusterName(),
4161       "NEW_VG_NAME": self.op.vg_name,
4162       }
4163
4164   def BuildHooksNodes(self):
4165     """Build hooks nodes.
4166
4167     """
4168     mn = self.cfg.GetMasterNode()
4169     return ([mn], [mn])
4170
4171   def CheckPrereq(self):
4172     """Check prerequisites.
4173
4174     This checks whether the given params don't conflict and
4175     if the given volume group is valid.
4176
4177     """
4178     if self.op.vg_name is not None and not self.op.vg_name:
4179       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181                                    " instances exist", errors.ECODE_INVAL)
4182
4183     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185         raise errors.OpPrereqError("Cannot disable drbd helper while"
4186                                    " drbd-based instances exist",
4187                                    errors.ECODE_INVAL)
4188
4189     node_list = self.owned_locks(locking.LEVEL_NODE)
4190
4191     # if vg_name not None, checks given volume group on all nodes
4192     if self.op.vg_name:
4193       vglist = self.rpc.call_vg_list(node_list)
4194       for node in node_list:
4195         msg = vglist[node].fail_msg
4196         if msg:
4197           # ignoring down node
4198           self.LogWarning("Error while gathering data on node %s"
4199                           " (ignoring node): %s", node, msg)
4200           continue
4201         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4202                                               self.op.vg_name,
4203                                               constants.MIN_VG_SIZE)
4204         if vgstatus:
4205           raise errors.OpPrereqError("Error on node '%s': %s" %
4206                                      (node, vgstatus), errors.ECODE_ENVIRON)
4207
4208     if self.op.drbd_helper:
4209       # checks given drbd helper on all nodes
4210       helpers = self.rpc.call_drbd_helper(node_list)
4211       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4212         if ninfo.offline:
4213           self.LogInfo("Not checking drbd helper on offline node %s", node)
4214           continue
4215         msg = helpers[node].fail_msg
4216         if msg:
4217           raise errors.OpPrereqError("Error checking drbd helper on node"
4218                                      " '%s': %s" % (node, msg),
4219                                      errors.ECODE_ENVIRON)
4220         node_helper = helpers[node].payload
4221         if node_helper != self.op.drbd_helper:
4222           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223                                      (node, node_helper), errors.ECODE_ENVIRON)
4224
4225     self.cluster = cluster = self.cfg.GetClusterInfo()
4226     # validate params changes
4227     if self.op.beparams:
4228       objects.UpgradeBeParams(self.op.beparams)
4229       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4231
4232     if self.op.ndparams:
4233       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4235
4236       # TODO: we need a more general way to handle resetting
4237       # cluster-level parameters to default values
4238       if self.new_ndparams["oob_program"] == "":
4239         self.new_ndparams["oob_program"] = \
4240             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4241
4242     if self.op.hv_state:
4243       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244                                             self.cluster.hv_state_static)
4245       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246                                for hv, values in new_hv_state.items())
4247
4248     if self.op.disk_state:
4249       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250                                                 self.cluster.disk_state_static)
4251       self.new_disk_state = \
4252         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253                             for name, values in svalues.items()))
4254              for storage, svalues in new_disk_state.items())
4255
4256     if self.op.ipolicy:
4257       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4258                                             group_policy=False)
4259
4260       all_instances = self.cfg.GetAllInstancesInfo().values()
4261       violations = set()
4262       for group in self.cfg.GetAllNodeGroupsInfo().values():
4263         instances = frozenset([inst for inst in all_instances
4264                                if compat.any(node in group.members
4265                                              for node in inst.all_nodes)])
4266         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268         new = _ComputeNewInstanceViolations(ipol,
4269                                             new_ipolicy, instances)
4270         if new:
4271           violations.update(new)
4272
4273       if violations:
4274         self.LogWarning("After the ipolicy change the following instances"
4275                         " violate them: %s",
4276                         utils.CommaJoin(utils.NiceSort(violations)))
4277
4278     if self.op.nicparams:
4279       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4282       nic_errors = []
4283
4284       # check all instances for consistency
4285       for instance in self.cfg.GetAllInstancesInfo().values():
4286         for nic_idx, nic in enumerate(instance.nics):
4287           params_copy = copy.deepcopy(nic.nicparams)
4288           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4289
4290           # check parameter syntax
4291           try:
4292             objects.NIC.CheckParameterSyntax(params_filled)
4293           except errors.ConfigurationError, err:
4294             nic_errors.append("Instance %s, nic/%d: %s" %
4295                               (instance.name, nic_idx, err))
4296
4297           # if we're moving instances to routed, check that they have an ip
4298           target_mode = params_filled[constants.NIC_MODE]
4299           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301                               " address" % (instance.name, nic_idx))
4302       if nic_errors:
4303         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4305
4306     # hypervisor list/parameters
4307     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308     if self.op.hvparams:
4309       for hv_name, hv_dict in self.op.hvparams.items():
4310         if hv_name not in self.new_hvparams:
4311           self.new_hvparams[hv_name] = hv_dict
4312         else:
4313           self.new_hvparams[hv_name].update(hv_dict)
4314
4315     # disk template parameters
4316     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317     if self.op.diskparams:
4318       for dt_name, dt_params in self.op.diskparams.items():
4319         if dt_name not in self.op.diskparams:
4320           self.new_diskparams[dt_name] = dt_params
4321         else:
4322           self.new_diskparams[dt_name].update(dt_params)
4323
4324     # os hypervisor parameters
4325     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4326     if self.op.os_hvp:
4327       for os_name, hvs in self.op.os_hvp.items():
4328         if os_name not in self.new_os_hvp:
4329           self.new_os_hvp[os_name] = hvs
4330         else:
4331           for hv_name, hv_dict in hvs.items():
4332             if hv_dict is None:
4333               # Delete if it exists
4334               self.new_os_hvp[os_name].pop(hv_name, None)
4335             elif hv_name not in self.new_os_hvp[os_name]:
4336               self.new_os_hvp[os_name][hv_name] = hv_dict
4337             else:
4338               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4339
4340     # os parameters
4341     self.new_osp = objects.FillDict(cluster.osparams, {})
4342     if self.op.osparams:
4343       for os_name, osp in self.op.osparams.items():
4344         if os_name not in self.new_osp:
4345           self.new_osp[os_name] = {}
4346
4347         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4348                                                   use_none=True)
4349
4350         if not self.new_osp[os_name]:
4351           # we removed all parameters
4352           del self.new_osp[os_name]
4353         else:
4354           # check the parameter validity (remote check)
4355           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356                          os_name, self.new_osp[os_name])
4357
4358     # changes to the hypervisor list
4359     if self.op.enabled_hypervisors is not None:
4360       self.hv_list = self.op.enabled_hypervisors
4361       for hv in self.hv_list:
4362         # if the hypervisor doesn't already exist in the cluster
4363         # hvparams, we initialize it to empty, and then (in both
4364         # cases) we make sure to fill the defaults, as we might not
4365         # have a complete defaults list if the hypervisor wasn't
4366         # enabled before
4367         if hv not in new_hvp:
4368           new_hvp[hv] = {}
4369         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4371     else:
4372       self.hv_list = cluster.enabled_hypervisors
4373
4374     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375       # either the enabled list has changed, or the parameters have, validate
4376       for hv_name, hv_params in self.new_hvparams.items():
4377         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378             (self.op.enabled_hypervisors and
4379              hv_name in self.op.enabled_hypervisors)):
4380           # either this is a new hypervisor, or its parameters have changed
4381           hv_class = hypervisor.GetHypervisorClass(hv_name)
4382           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383           hv_class.CheckParameterSyntax(hv_params)
4384           _CheckHVParams(self, node_list, hv_name, hv_params)
4385
4386     if self.op.os_hvp:
4387       # no need to check any newly-enabled hypervisors, since the
4388       # defaults have already been checked in the above code-block
4389       for os_name, os_hvp in self.new_os_hvp.items():
4390         for hv_name, hv_params in os_hvp.items():
4391           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392           # we need to fill in the new os_hvp on top of the actual hv_p
4393           cluster_defaults = self.new_hvparams.get(hv_name, {})
4394           new_osp = objects.FillDict(cluster_defaults, hv_params)
4395           hv_class = hypervisor.GetHypervisorClass(hv_name)
4396           hv_class.CheckParameterSyntax(new_osp)
4397           _CheckHVParams(self, node_list, hv_name, new_osp)
4398
4399     if self.op.default_iallocator:
4400       alloc_script = utils.FindFile(self.op.default_iallocator,
4401                                     constants.IALLOCATOR_SEARCH_PATH,
4402                                     os.path.isfile)
4403       if alloc_script is None:
4404         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405                                    " specified" % self.op.default_iallocator,
4406                                    errors.ECODE_INVAL)
4407
4408   def Exec(self, feedback_fn):
4409     """Change the parameters of the cluster.
4410
4411     """
4412     if self.op.vg_name is not None:
4413       new_volume = self.op.vg_name
4414       if not new_volume:
4415         new_volume = None
4416       if new_volume != self.cfg.GetVGName():
4417         self.cfg.SetVGName(new_volume)
4418       else:
4419         feedback_fn("Cluster LVM configuration already in desired"
4420                     " state, not changing")
4421     if self.op.drbd_helper is not None:
4422       new_helper = self.op.drbd_helper
4423       if not new_helper:
4424         new_helper = None
4425       if new_helper != self.cfg.GetDRBDHelper():
4426         self.cfg.SetDRBDHelper(new_helper)
4427       else:
4428         feedback_fn("Cluster DRBD helper already in desired state,"
4429                     " not changing")
4430     if self.op.hvparams:
4431       self.cluster.hvparams = self.new_hvparams
4432     if self.op.os_hvp:
4433       self.cluster.os_hvp = self.new_os_hvp
4434     if self.op.enabled_hypervisors is not None:
4435       self.cluster.hvparams = self.new_hvparams
4436       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437     if self.op.beparams:
4438       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439     if self.op.nicparams:
4440       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4441     if self.op.ipolicy:
4442       self.cluster.ipolicy = self.new_ipolicy
4443     if self.op.osparams:
4444       self.cluster.osparams = self.new_osp
4445     if self.op.ndparams:
4446       self.cluster.ndparams = self.new_ndparams
4447     if self.op.diskparams:
4448       self.cluster.diskparams = self.new_diskparams
4449     if self.op.hv_state:
4450       self.cluster.hv_state_static = self.new_hv_state
4451     if self.op.disk_state:
4452       self.cluster.disk_state_static = self.new_disk_state
4453
4454     if self.op.candidate_pool_size is not None:
4455       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456       # we need to update the pool size here, otherwise the save will fail
4457       _AdjustCandidatePool(self, [])
4458
4459     if self.op.maintain_node_health is not None:
4460       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461         feedback_fn("Note: CONFD was disabled at build time, node health"
4462                     " maintenance is not useful (still enabling it)")
4463       self.cluster.maintain_node_health = self.op.maintain_node_health
4464
4465     if self.op.prealloc_wipe_disks is not None:
4466       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4467
4468     if self.op.add_uids is not None:
4469       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4470
4471     if self.op.remove_uids is not None:
4472       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4473
4474     if self.op.uid_pool is not None:
4475       self.cluster.uid_pool = self.op.uid_pool
4476
4477     if self.op.default_iallocator is not None:
4478       self.cluster.default_iallocator = self.op.default_iallocator
4479
4480     if self.op.reserved_lvs is not None:
4481       self.cluster.reserved_lvs = self.op.reserved_lvs
4482
4483     if self.op.use_external_mip_script is not None:
4484       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4485
4486     def helper_os(aname, mods, desc):
4487       desc += " OS list"
4488       lst = getattr(self.cluster, aname)
4489       for key, val in mods:
4490         if key == constants.DDM_ADD:
4491           if val in lst:
4492             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4493           else:
4494             lst.append(val)
4495         elif key == constants.DDM_REMOVE:
4496           if val in lst:
4497             lst.remove(val)
4498           else:
4499             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4500         else:
4501           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4502
4503     if self.op.hidden_os:
4504       helper_os("hidden_os", self.op.hidden_os, "hidden")
4505
4506     if self.op.blacklisted_os:
4507       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4508
4509     if self.op.master_netdev:
4510       master_params = self.cfg.GetMasterNetworkParameters()
4511       ems = self.cfg.GetUseExternalMipScript()
4512       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513                   self.cluster.master_netdev)
4514       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4515                                                        master_params, ems)
4516       result.Raise("Could not disable the master ip")
4517       feedback_fn("Changing master_netdev from %s to %s" %
4518                   (master_params.netdev, self.op.master_netdev))
4519       self.cluster.master_netdev = self.op.master_netdev
4520
4521     if self.op.master_netmask:
4522       master_params = self.cfg.GetMasterNetworkParameters()
4523       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524       result = self.rpc.call_node_change_master_netmask(master_params.name,
4525                                                         master_params.netmask,
4526                                                         self.op.master_netmask,
4527                                                         master_params.ip,
4528                                                         master_params.netdev)
4529       if result.fail_msg:
4530         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4531         feedback_fn(msg)
4532
4533       self.cluster.master_netmask = self.op.master_netmask
4534
4535     self.cfg.Update(self.cluster, feedback_fn)
4536
4537     if self.op.master_netdev:
4538       master_params = self.cfg.GetMasterNetworkParameters()
4539       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540                   self.op.master_netdev)
4541       ems = self.cfg.GetUseExternalMipScript()
4542       result = self.rpc.call_node_activate_master_ip(master_params.name,
4543                                                      master_params, ems)
4544       if result.fail_msg:
4545         self.LogWarning("Could not re-enable the master ip on"
4546                         " the master, please restart manually: %s",
4547                         result.fail_msg)
4548
4549
4550 def _UploadHelper(lu, nodes, fname):
4551   """Helper for uploading a file and showing warnings.
4552
4553   """
4554   if os.path.exists(fname):
4555     result = lu.rpc.call_upload_file(nodes, fname)
4556     for to_node, to_result in result.items():
4557       msg = to_result.fail_msg
4558       if msg:
4559         msg = ("Copy of file %s to node %s failed: %s" %
4560                (fname, to_node, msg))
4561         lu.LogWarning(msg)
4562
4563
4564 def _ComputeAncillaryFiles(cluster, redist):
4565   """Compute files external to Ganeti which need to be consistent.
4566
4567   @type redist: boolean
4568   @param redist: Whether to include files which need to be redistributed
4569
4570   """
4571   # Compute files for all nodes
4572   files_all = set([
4573     pathutils.SSH_KNOWN_HOSTS_FILE,
4574     pathutils.CONFD_HMAC_KEY,
4575     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576     pathutils.SPICE_CERT_FILE,
4577     pathutils.SPICE_CACERT_FILE,
4578     pathutils.RAPI_USERS_FILE,
4579     ])
4580
4581   if redist:
4582     # we need to ship at least the RAPI certificate
4583     files_all.add(pathutils.RAPI_CERT_FILE)
4584   else:
4585     files_all.update(pathutils.ALL_CERT_FILES)
4586     files_all.update(ssconf.SimpleStore().GetFileList())
4587
4588   if cluster.modify_etc_hosts:
4589     files_all.add(pathutils.ETC_HOSTS)
4590
4591   if cluster.use_external_mip_script:
4592     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4593
4594   # Files which are optional, these must:
4595   # - be present in one other category as well
4596   # - either exist or not exist on all nodes of that category (mc, vm all)
4597   files_opt = set([
4598     pathutils.RAPI_USERS_FILE,
4599     ])
4600
4601   # Files which should only be on master candidates
4602   files_mc = set()
4603
4604   if not redist:
4605     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4606
4607   # File storage
4608   if (not redist and
4609       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4612
4613   # Files which should only be on VM-capable nodes
4614   files_vm = set(
4615     filename
4616     for hv_name in cluster.enabled_hypervisors
4617     for filename in
4618       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4619
4620   files_opt |= set(
4621     filename
4622     for hv_name in cluster.enabled_hypervisors
4623     for filename in
4624       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4625
4626   # Filenames in each category must be unique
4627   all_files_set = files_all | files_mc | files_vm
4628   assert (len(all_files_set) ==
4629           sum(map(len, [files_all, files_mc, files_vm]))), \
4630          "Found file listed in more than one file list"
4631
4632   # Optional files must be present in one other category
4633   assert all_files_set.issuperset(files_opt), \
4634          "Optional file not in a different required list"
4635
4636   # This one file should never ever be re-distributed via RPC
4637   assert not (redist and
4638               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4639
4640   return (files_all, files_opt, files_mc, files_vm)
4641
4642
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644   """Distribute additional files which are part of the cluster configuration.
4645
4646   ConfigWriter takes care of distributing the config and ssconf files, but
4647   there are more files which should be distributed to all nodes. This function
4648   makes sure those are copied.
4649
4650   @param lu: calling logical unit
4651   @param additional_nodes: list of nodes not in the config to distribute to
4652   @type additional_vm: boolean
4653   @param additional_vm: whether the additional nodes are vm-capable or not
4654
4655   """
4656   # Gather target nodes
4657   cluster = lu.cfg.GetClusterInfo()
4658   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4659
4660   online_nodes = lu.cfg.GetOnlineNodeList()
4661   online_set = frozenset(online_nodes)
4662   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4663
4664   if additional_nodes is not None:
4665     online_nodes.extend(additional_nodes)
4666     if additional_vm:
4667       vm_nodes.extend(additional_nodes)
4668
4669   # Never distribute to master node
4670   for nodelist in [online_nodes, vm_nodes]:
4671     if master_info.name in nodelist:
4672       nodelist.remove(master_info.name)
4673
4674   # Gather file lists
4675   (files_all, _, files_mc, files_vm) = \
4676     _ComputeAncillaryFiles(cluster, True)
4677
4678   # Never re-distribute configuration file from here
4679   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680               pathutils.CLUSTER_CONF_FILE in files_vm)
4681   assert not files_mc, "Master candidates not handled in this function"
4682
4683   filemap = [
4684     (online_nodes, files_all),
4685     (vm_nodes, files_vm),
4686     ]
4687
4688   # Upload the files
4689   for (node_list, files) in filemap:
4690     for fname in files:
4691       _UploadHelper(lu, node_list, fname)
4692
4693
4694 class LUClusterRedistConf(NoHooksLU):
4695   """Force the redistribution of cluster configuration.
4696
4697   This is a very simple LU.
4698
4699   """
4700   REQ_BGL = False
4701
4702   def ExpandNames(self):
4703     self.needed_locks = {
4704       locking.LEVEL_NODE: locking.ALL_SET,
4705       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4706     }
4707     self.share_locks = _ShareAll()
4708
4709   def Exec(self, feedback_fn):
4710     """Redistribute the configuration.
4711
4712     """
4713     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714     _RedistributeAncillaryFiles(self)
4715
4716
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718   """Activate the master IP on the master node.
4719
4720   """
4721   def Exec(self, feedback_fn):
4722     """Activate the master IP.
4723
4724     """
4725     master_params = self.cfg.GetMasterNetworkParameters()
4726     ems = self.cfg.GetUseExternalMipScript()
4727     result = self.rpc.call_node_activate_master_ip(master_params.name,
4728                                                    master_params, ems)
4729     result.Raise("Could not activate the master IP")
4730
4731
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733   """Deactivate the master IP on the master node.
4734
4735   """
4736   def Exec(self, feedback_fn):
4737     """Deactivate the master IP.
4738
4739     """
4740     master_params = self.cfg.GetMasterNetworkParameters()
4741     ems = self.cfg.GetUseExternalMipScript()
4742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4743                                                      master_params, ems)
4744     result.Raise("Could not deactivate the master IP")
4745
4746
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748   """Sleep and poll for an instance's disk to sync.
4749
4750   """
4751   if not instance.disks or disks is not None and not disks:
4752     return True
4753
4754   disks = _ExpandCheckDisks(instance, disks)
4755
4756   if not oneshot:
4757     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4758
4759   node = instance.primary_node
4760
4761   for dev in disks:
4762     lu.cfg.SetDiskID(dev, node)
4763
4764   # TODO: Convert to utils.Retry
4765
4766   retries = 0
4767   degr_retries = 10 # in seconds, as we sleep 1 second each time
4768   while True:
4769     max_time = 0
4770     done = True
4771     cumul_degraded = False
4772     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773     msg = rstats.fail_msg
4774     if msg:
4775       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4776       retries += 1
4777       if retries >= 10:
4778         raise errors.RemoteError("Can't contact node %s for mirror data,"
4779                                  " aborting." % node)
4780       time.sleep(6)
4781       continue
4782     rstats = rstats.payload
4783     retries = 0
4784     for i, mstat in enumerate(rstats):
4785       if mstat is None:
4786         lu.LogWarning("Can't compute data for node %s/%s",
4787                            node, disks[i].iv_name)
4788         continue
4789
4790       cumul_degraded = (cumul_degraded or
4791                         (mstat.is_degraded and mstat.sync_percent is None))
4792       if mstat.sync_percent is not None:
4793         done = False
4794         if mstat.estimated_time is not None:
4795           rem_time = ("%s remaining (estimated)" %
4796                       utils.FormatSeconds(mstat.estimated_time))
4797           max_time = mstat.estimated_time
4798         else:
4799           rem_time = "no time estimate"
4800         lu.LogInfo("- device %s: %5.2f%% done, %s",
4801                    disks[i].iv_name, mstat.sync_percent, rem_time)
4802
4803     # if we're done but degraded, let's do a few small retries, to
4804     # make sure we see a stable and not transient situation; therefore
4805     # we force restart of the loop
4806     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807       logging.info("Degraded disks found, %d retries left", degr_retries)
4808       degr_retries -= 1
4809       time.sleep(1)
4810       continue
4811
4812     if done or oneshot:
4813       break
4814
4815     time.sleep(min(60, max_time))
4816
4817   if done:
4818     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4819
4820   return not cumul_degraded
4821
4822
4823 def _BlockdevFind(lu, node, dev, instance):
4824   """Wrapper around call_blockdev_find to annotate diskparams.
4825
4826   @param lu: A reference to the lu object
4827   @param node: The node to call out
4828   @param dev: The device to find
4829   @param instance: The instance object the device belongs to
4830   @returns The result of the rpc call
4831
4832   """
4833   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834   return lu.rpc.call_blockdev_find(node, disk)
4835
4836
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838   """Wrapper around L{_CheckDiskConsistencyInner}.
4839
4840   """
4841   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4843                                     ldisk=ldisk)
4844
4845
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4847                                ldisk=False):
4848   """Check that mirrors are not degraded.
4849
4850   @attention: The device has to be annotated already.
4851
4852   The ldisk parameter, if True, will change the test from the
4853   is_degraded attribute (which represents overall non-ok status for
4854   the device(s)) to the ldisk (representing the local storage status).
4855
4856   """
4857   lu.cfg.SetDiskID(dev, node)
4858
4859   result = True
4860
4861   if on_primary or dev.AssembleOnSecondary():
4862     rstats = lu.rpc.call_blockdev_find(node, dev)
4863     msg = rstats.fail_msg
4864     if msg:
4865       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4866       result = False
4867     elif not rstats.payload:
4868       lu.LogWarning("Can't find disk on node %s", node)
4869       result = False
4870     else:
4871       if ldisk:
4872         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4873       else:
4874         result = result and not rstats.payload.is_degraded
4875
4876   if dev.children:
4877     for child in dev.children:
4878       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4879                                                      on_primary)
4880
4881   return result
4882
4883
4884 class LUOobCommand(NoHooksLU):
4885   """Logical unit for OOB handling.
4886
4887   """
4888   REQ_BGL = False
4889   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4890
4891   def ExpandNames(self):
4892     """Gather locks we need.
4893
4894     """
4895     if self.op.node_names:
4896       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897       lock_names = self.op.node_names
4898     else:
4899       lock_names = locking.ALL_SET
4900
4901     self.needed_locks = {
4902       locking.LEVEL_NODE: lock_names,
4903       }
4904
4905     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4906
4907     if not self.op.node_names:
4908       # Acquire node allocation lock only if all nodes are affected
4909       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4910
4911   def CheckPrereq(self):
4912     """Check prerequisites.
4913
4914     This checks:
4915      - the node exists in the configuration
4916      - OOB is supported
4917
4918     Any errors are signaled by raising errors.OpPrereqError.
4919
4920     """
4921     self.nodes = []
4922     self.master_node = self.cfg.GetMasterNode()
4923
4924     assert self.op.power_delay >= 0.0
4925
4926     if self.op.node_names:
4927       if (self.op.command in self._SKIP_MASTER and
4928           self.master_node in self.op.node_names):
4929         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4931
4932         if master_oob_handler:
4933           additional_text = ("run '%s %s %s' if you want to operate on the"
4934                              " master regardless") % (master_oob_handler,
4935                                                       self.op.command,
4936                                                       self.master_node)
4937         else:
4938           additional_text = "it does not support out-of-band operations"
4939
4940         raise errors.OpPrereqError(("Operating on the master node %s is not"
4941                                     " allowed for %s; %s") %
4942                                    (self.master_node, self.op.command,
4943                                     additional_text), errors.ECODE_INVAL)
4944     else:
4945       self.op.node_names = self.cfg.GetNodeList()
4946       if self.op.command in self._SKIP_MASTER:
4947         self.op.node_names.remove(self.master_node)
4948
4949     if self.op.command in self._SKIP_MASTER:
4950       assert self.master_node not in self.op.node_names
4951
4952     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4953       if node is None:
4954         raise errors.OpPrereqError("Node %s not found" % node_name,
4955                                    errors.ECODE_NOENT)
4956       else:
4957         self.nodes.append(node)
4958
4959       if (not self.op.ignore_status and
4960           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962                                     " not marked offline") % node_name,
4963                                    errors.ECODE_STATE)
4964
4965   def Exec(self, feedback_fn):
4966     """Execute OOB and return result if we expect any.
4967
4968     """
4969     master_node = self.master_node
4970     ret = []
4971
4972     for idx, node in enumerate(utils.NiceSort(self.nodes,
4973                                               key=lambda node: node.name)):
4974       node_entry = [(constants.RS_NORMAL, node.name)]
4975       ret.append(node_entry)
4976
4977       oob_program = _SupportsOob(self.cfg, node)
4978
4979       if not oob_program:
4980         node_entry.append((constants.RS_UNAVAIL, None))
4981         continue
4982
4983       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984                    self.op.command, oob_program, node.name)
4985       result = self.rpc.call_run_oob(master_node, oob_program,
4986                                      self.op.command, node.name,
4987                                      self.op.timeout)
4988
4989       if result.fail_msg:
4990         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991                         node.name, result.fail_msg)
4992         node_entry.append((constants.RS_NODATA, None))
4993       else:
4994         try:
4995           self._CheckPayload(result)
4996         except errors.OpExecError, err:
4997           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4998                           node.name, err)
4999           node_entry.append((constants.RS_NODATA, None))
5000         else:
5001           if self.op.command == constants.OOB_HEALTH:
5002             # For health we should log important events
5003             for item, status in result.payload:
5004               if status in [constants.OOB_STATUS_WARNING,
5005                             constants.OOB_STATUS_CRITICAL]:
5006                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007                                 item, node.name, status)
5008
5009           if self.op.command == constants.OOB_POWER_ON:
5010             node.powered = True
5011           elif self.op.command == constants.OOB_POWER_OFF:
5012             node.powered = False
5013           elif self.op.command == constants.OOB_POWER_STATUS:
5014             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015             if powered != node.powered:
5016               logging.warning(("Recorded power state (%s) of node '%s' does not"
5017                                " match actual power state (%s)"), node.powered,
5018                               node.name, powered)
5019
5020           # For configuration changing commands we should update the node
5021           if self.op.command in (constants.OOB_POWER_ON,
5022                                  constants.OOB_POWER_OFF):
5023             self.cfg.Update(node, feedback_fn)
5024
5025           node_entry.append((constants.RS_NORMAL, result.payload))
5026
5027           if (self.op.command == constants.OOB_POWER_ON and
5028               idx < len(self.nodes) - 1):
5029             time.sleep(self.op.power_delay)
5030
5031     return ret
5032
5033   def _CheckPayload(self, result):
5034     """Checks if the payload is valid.
5035
5036     @param result: RPC result
5037     @raises errors.OpExecError: If payload is not valid
5038
5039     """
5040     errs = []
5041     if self.op.command == constants.OOB_HEALTH:
5042       if not isinstance(result.payload, list):
5043         errs.append("command 'health' is expected to return a list but got %s" %
5044                     type(result.payload))
5045       else:
5046         for item, status in result.payload:
5047           if status not in constants.OOB_STATUSES:
5048             errs.append("health item '%s' has invalid status '%s'" %
5049                         (item, status))
5050
5051     if self.op.command == constants.OOB_POWER_STATUS:
5052       if not isinstance(result.payload, dict):
5053         errs.append("power-status is expected to return a dict but got %s" %
5054                     type(result.payload))
5055
5056     if self.op.command in [
5057       constants.OOB_POWER_ON,
5058       constants.OOB_POWER_OFF,
5059       constants.OOB_POWER_CYCLE,
5060       ]:
5061       if result.payload is not None:
5062         errs.append("%s is expected to not return payload but got '%s'" %
5063                     (self.op.command, result.payload))
5064
5065     if errs:
5066       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067                                utils.CommaJoin(errs))
5068
5069
5070 class _OsQuery(_QueryBase):
5071   FIELDS = query.OS_FIELDS
5072
5073   def ExpandNames(self, lu):
5074     # Lock all nodes in shared mode
5075     # Temporary removal of locks, should be reverted later
5076     # TODO: reintroduce locks when they are lighter-weight
5077     lu.needed_locks = {}
5078     #self.share_locks[locking.LEVEL_NODE] = 1
5079     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5080
5081     # The following variables interact with _QueryBase._GetNames
5082     if self.names:
5083       self.wanted = self.names
5084     else:
5085       self.wanted = locking.ALL_SET
5086
5087     self.do_locking = self.use_locking
5088
5089   def DeclareLocks(self, lu, level):
5090     pass
5091
5092   @staticmethod
5093   def _DiagnoseByOS(rlist):
5094     """Remaps a per-node return list into an a per-os per-node dictionary
5095
5096     @param rlist: a map with node names as keys and OS objects as values
5097
5098     @rtype: dict
5099     @return: a dictionary with osnames as keys and as value another
5100         map, with nodes as keys and tuples of (path, status, diagnose,
5101         variants, parameters, api_versions) as values, eg::
5102
5103           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104                                      (/srv/..., False, "invalid api")],
5105                            "node2": [(/srv/..., True, "", [], [])]}
5106           }
5107
5108     """
5109     all_os = {}
5110     # we build here the list of nodes that didn't fail the RPC (at RPC
5111     # level), so that nodes with a non-responding node daemon don't
5112     # make all OSes invalid
5113     good_nodes = [node_name for node_name in rlist
5114                   if not rlist[node_name].fail_msg]
5115     for node_name, nr in rlist.items():
5116       if nr.fail_msg or not nr.payload:
5117         continue
5118       for (name, path, status, diagnose, variants,
5119            params, api_versions) in nr.payload:
5120         if name not in all_os:
5121           # build a list of nodes for this os containing empty lists
5122           # for each node in node_list
5123           all_os[name] = {}
5124           for nname in good_nodes:
5125             all_os[name][nname] = []
5126         # convert params from [name, help] to (name, help)
5127         params = [tuple(v) for v in params]
5128         all_os[name][node_name].append((path, status, diagnose,
5129                                         variants, params, api_versions))
5130     return all_os
5131
5132   def _GetQueryData(self, lu):
5133     """Computes the list of nodes and their attributes.
5134
5135     """
5136     # Locking is not used
5137     assert not (compat.any(lu.glm.is_owned(level)
5138                            for level in locking.LEVELS
5139                            if level != locking.LEVEL_CLUSTER) or
5140                 self.do_locking or self.use_locking)
5141
5142     valid_nodes = [node.name
5143                    for node in lu.cfg.GetAllNodesInfo().values()
5144                    if not node.offline and node.vm_capable]
5145     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146     cluster = lu.cfg.GetClusterInfo()
5147
5148     data = {}
5149
5150     for (os_name, os_data) in pol.items():
5151       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152                           hidden=(os_name in cluster.hidden_os),
5153                           blacklisted=(os_name in cluster.blacklisted_os))
5154
5155       variants = set()
5156       parameters = set()
5157       api_versions = set()
5158
5159       for idx, osl in enumerate(os_data.values()):
5160         info.valid = bool(info.valid and osl and osl[0][1])
5161         if not info.valid:
5162           break
5163
5164         (node_variants, node_params, node_api) = osl[0][3:6]
5165         if idx == 0:
5166           # First entry
5167           variants.update(node_variants)
5168           parameters.update(node_params)
5169           api_versions.update(node_api)
5170         else:
5171           # Filter out inconsistent values
5172           variants.intersection_update(node_variants)
5173           parameters.intersection_update(node_params)
5174           api_versions.intersection_update(node_api)
5175
5176       info.variants = list(variants)
5177       info.parameters = list(parameters)
5178       info.api_versions = list(api_versions)
5179
5180       data[os_name] = info
5181
5182     # Prepare data in requested order
5183     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5184             if name in data]
5185
5186
5187 class LUOsDiagnose(NoHooksLU):
5188   """Logical unit for OS diagnose/query.
5189
5190   """
5191   REQ_BGL = False
5192
5193   @staticmethod
5194   def _BuildFilter(fields, names):
5195     """Builds a filter for querying OSes.
5196
5197     """
5198     name_filter = qlang.MakeSimpleFilter("name", names)
5199
5200     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201     # respective field is not requested
5202     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203                      for fname in ["hidden", "blacklisted"]
5204                      if fname not in fields]
5205     if "valid" not in fields:
5206       status_filter.append([qlang.OP_TRUE, "valid"])
5207
5208     if status_filter:
5209       status_filter.insert(0, qlang.OP_AND)
5210     else:
5211       status_filter = None
5212
5213     if name_filter and status_filter:
5214       return [qlang.OP_AND, name_filter, status_filter]
5215     elif name_filter:
5216       return name_filter
5217     else:
5218       return status_filter
5219
5220   def CheckArguments(self):
5221     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222                        self.op.output_fields, False)
5223
5224   def ExpandNames(self):
5225     self.oq.ExpandNames(self)
5226
5227   def Exec(self, feedback_fn):
5228     return self.oq.OldStyleQuery(self)
5229
5230
5231 class _ExtStorageQuery(_QueryBase):
5232   FIELDS = query.EXTSTORAGE_FIELDS
5233
5234   def ExpandNames(self, lu):
5235     # Lock all nodes in shared mode
5236     # Temporary removal of locks, should be reverted later
5237     # TODO: reintroduce locks when they are lighter-weight
5238     lu.needed_locks = {}
5239     #self.share_locks[locking.LEVEL_NODE] = 1
5240     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5241
5242     # The following variables interact with _QueryBase._GetNames
5243     if self.names:
5244       self.wanted = self.names
5245     else:
5246       self.wanted = locking.ALL_SET
5247
5248     self.do_locking = self.use_locking
5249
5250   def DeclareLocks(self, lu, level):
5251     pass
5252
5253   @staticmethod
5254   def _DiagnoseByProvider(rlist):
5255     """Remaps a per-node return list into an a per-provider per-node dictionary
5256
5257     @param rlist: a map with node names as keys and ExtStorage objects as values
5258
5259     @rtype: dict
5260     @return: a dictionary with extstorage providers as keys and as
5261         value another map, with nodes as keys and tuples of
5262         (path, status, diagnose, parameters) as values, eg::
5263
5264           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265                          "node2": [(/srv/..., False, "missing file")]
5266                          "node3": [(/srv/..., True, "", [])]
5267           }
5268
5269     """
5270     all_es = {}
5271     # we build here the list of nodes that didn't fail the RPC (at RPC
5272     # level), so that nodes with a non-responding node daemon don't
5273     # make all OSes invalid
5274     good_nodes = [node_name for node_name in rlist
5275                   if not rlist[node_name].fail_msg]
5276     for node_name, nr in rlist.items():
5277       if nr.fail_msg or not nr.payload:
5278         continue
5279       for (name, path, status, diagnose, params) in nr.payload:
5280         if name not in all_es:
5281           # build a list of nodes for this os containing empty lists
5282           # for each node in node_list
5283           all_es[name] = {}
5284           for nname in good_nodes:
5285             all_es[name][nname] = []
5286         # convert params from [name, help] to (name, help)
5287         params = [tuple(v) for v in params]
5288         all_es[name][node_name].append((path, status, diagnose, params))
5289     return all_es
5290
5291   def _GetQueryData(self, lu):
5292     """Computes the list of nodes and their attributes.
5293
5294     """
5295     # Locking is not used
5296     assert not (compat.any(lu.glm.is_owned(level)
5297                            for level in locking.LEVELS
5298                            if level != locking.LEVEL_CLUSTER) or
5299                 self.do_locking or self.use_locking)
5300
5301     valid_nodes = [node.name
5302                    for node in lu.cfg.GetAllNodesInfo().values()
5303                    if not node.offline and node.vm_capable]
5304     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5305
5306     data = {}
5307
5308     nodegroup_list = lu.cfg.GetNodeGroupList()
5309
5310     for (es_name, es_data) in pol.items():
5311       # For every provider compute the nodegroup validity.
5312       # To do this we need to check the validity of each node in es_data
5313       # and then construct the corresponding nodegroup dict:
5314       #      { nodegroup1: status
5315       #        nodegroup2: status
5316       #      }
5317       ndgrp_data = {}
5318       for nodegroup in nodegroup_list:
5319         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5320
5321         nodegroup_nodes = ndgrp.members
5322         nodegroup_name = ndgrp.name
5323         node_statuses = []
5324
5325         for node in nodegroup_nodes:
5326           if node in valid_nodes:
5327             if es_data[node] != []:
5328               node_status = es_data[node][0][1]
5329               node_statuses.append(node_status)
5330             else:
5331               node_statuses.append(False)
5332
5333         if False in node_statuses:
5334           ndgrp_data[nodegroup_name] = False
5335         else:
5336           ndgrp_data[nodegroup_name] = True
5337
5338       # Compute the provider's parameters
5339       parameters = set()
5340       for idx, esl in enumerate(es_data.values()):
5341         valid = bool(esl and esl[0][1])
5342         if not valid:
5343           break
5344
5345         node_params = esl[0][3]
5346         if idx == 0:
5347           # First entry
5348           parameters.update(node_params)
5349         else:
5350           # Filter out inconsistent values
5351           parameters.intersection_update(node_params)
5352
5353       params = list(parameters)
5354
5355       # Now fill all the info for this provider
5356       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357                                   nodegroup_status=ndgrp_data,
5358                                   parameters=params)
5359
5360       data[es_name] = info
5361
5362     # Prepare data in requested order
5363     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5364             if name in data]
5365
5366
5367 class LUExtStorageDiagnose(NoHooksLU):
5368   """Logical unit for ExtStorage diagnose/query.
5369
5370   """
5371   REQ_BGL = False
5372
5373   def CheckArguments(self):
5374     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375                                self.op.output_fields, False)
5376
5377   def ExpandNames(self):
5378     self.eq.ExpandNames(self)
5379
5380   def Exec(self, feedback_fn):
5381     return self.eq.OldStyleQuery(self)
5382
5383
5384 class LUNodeRemove(LogicalUnit):
5385   """Logical unit for removing a node.
5386
5387   """
5388   HPATH = "node-remove"
5389   HTYPE = constants.HTYPE_NODE
5390
5391   def BuildHooksEnv(self):
5392     """Build hooks env.
5393
5394     """
5395     return {
5396       "OP_TARGET": self.op.node_name,
5397       "NODE_NAME": self.op.node_name,
5398       }
5399
5400   def BuildHooksNodes(self):
5401     """Build hooks nodes.
5402
5403     This doesn't run on the target node in the pre phase as a failed
5404     node would then be impossible to remove.
5405
5406     """
5407     all_nodes = self.cfg.GetNodeList()
5408     try:
5409       all_nodes.remove(self.op.node_name)
5410     except ValueError:
5411       pass
5412     return (all_nodes, all_nodes)
5413
5414   def CheckPrereq(self):
5415     """Check prerequisites.
5416
5417     This checks:
5418      - the node exists in the configuration
5419      - it does not have primary or secondary instances
5420      - it's not the master
5421
5422     Any errors are signaled by raising errors.OpPrereqError.
5423
5424     """
5425     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426     node = self.cfg.GetNodeInfo(self.op.node_name)
5427     assert node is not None
5428
5429     masternode = self.cfg.GetMasterNode()
5430     if node.name == masternode:
5431       raise errors.OpPrereqError("Node is the master node, failover to another"
5432                                  " node is required", errors.ECODE_INVAL)
5433
5434     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435       if node.name in instance.all_nodes:
5436         raise errors.OpPrereqError("Instance %s is still running on the node,"
5437                                    " please remove first" % instance_name,
5438                                    errors.ECODE_INVAL)
5439     self.op.node_name = node.name
5440     self.node = node
5441
5442   def Exec(self, feedback_fn):
5443     """Removes the node from the cluster.
5444
5445     """
5446     node = self.node
5447     logging.info("Stopping the node daemon and removing configs from node %s",
5448                  node.name)
5449
5450     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5451
5452     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5453       "Not owning BGL"
5454
5455     # Promote nodes to master candidate as needed
5456     _AdjustCandidatePool(self, exceptions=[node.name])
5457     self.context.RemoveNode(node.name)
5458
5459     # Run post hooks on the node before it's removed
5460     _RunPostHook(self, node.name)
5461
5462     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463     msg = result.fail_msg
5464     if msg:
5465       self.LogWarning("Errors encountered on the remote node while leaving"
5466                       " the cluster: %s", msg)
5467
5468     # Remove node from our /etc/hosts
5469     if self.cfg.GetClusterInfo().modify_etc_hosts:
5470       master_node = self.cfg.GetMasterNode()
5471       result = self.rpc.call_etc_hosts_modify(master_node,
5472                                               constants.ETC_HOSTS_REMOVE,
5473                                               node.name, None)
5474       result.Raise("Can't update hosts file with new host data")
5475       _RedistributeAncillaryFiles(self)
5476
5477
5478 class _NodeQuery(_QueryBase):
5479   FIELDS = query.NODE_FIELDS
5480
5481   def ExpandNames(self, lu):
5482     lu.needed_locks = {}
5483     lu.share_locks = _ShareAll()
5484
5485     if self.names:
5486       self.wanted = _GetWantedNodes(lu, self.names)
5487     else:
5488       self.wanted = locking.ALL_SET
5489
5490     self.do_locking = (self.use_locking and
5491                        query.NQ_LIVE in self.requested_data)
5492
5493     if self.do_locking:
5494       # If any non-static field is requested we need to lock the nodes
5495       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5497
5498   def DeclareLocks(self, lu, level):
5499     pass
5500
5501   def _GetQueryData(self, lu):
5502     """Computes the list of nodes and their attributes.
5503
5504     """
5505     all_info = lu.cfg.GetAllNodesInfo()
5506
5507     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5508
5509     # Gather data as requested
5510     if query.NQ_LIVE in self.requested_data:
5511       # filter out non-vm_capable nodes
5512       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5513
5514       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516                                         [lu.cfg.GetHypervisorType()], es_flags)
5517       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518                        for (name, nresult) in node_data.items()
5519                        if not nresult.fail_msg and nresult.payload)
5520     else:
5521       live_data = None
5522
5523     if query.NQ_INST in self.requested_data:
5524       node_to_primary = dict([(name, set()) for name in nodenames])
5525       node_to_secondary = dict([(name, set()) for name in nodenames])
5526
5527       inst_data = lu.cfg.GetAllInstancesInfo()
5528
5529       for inst in inst_data.values():
5530         if inst.primary_node in node_to_primary:
5531           node_to_primary[inst.primary_node].add(inst.name)
5532         for secnode in inst.secondary_nodes:
5533           if secnode in node_to_secondary:
5534             node_to_secondary[secnode].add(inst.name)
5535     else:
5536       node_to_primary = None
5537       node_to_secondary = None
5538
5539     if query.NQ_OOB in self.requested_data:
5540       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541                          for name, node in all_info.iteritems())
5542     else:
5543       oob_support = None
5544
5545     if query.NQ_GROUP in self.requested_data:
5546       groups = lu.cfg.GetAllNodeGroupsInfo()
5547     else:
5548       groups = {}
5549
5550     return query.NodeQueryData([all_info[name] for name in nodenames],
5551                                live_data, lu.cfg.GetMasterNode(),
5552                                node_to_primary, node_to_secondary, groups,
5553                                oob_support, lu.cfg.GetClusterInfo())
5554
5555
5556 class LUNodeQuery(NoHooksLU):
5557   """Logical unit for querying nodes.
5558
5559   """
5560   # pylint: disable=W0142
5561   REQ_BGL = False
5562
5563   def CheckArguments(self):
5564     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565                          self.op.output_fields, self.op.use_locking)
5566
5567   def ExpandNames(self):
5568     self.nq.ExpandNames(self)
5569
5570   def DeclareLocks(self, level):
5571     self.nq.DeclareLocks(self, level)
5572
5573   def Exec(self, feedback_fn):
5574     return self.nq.OldStyleQuery(self)
5575
5576
5577 class LUNodeQueryvols(NoHooksLU):
5578   """Logical unit for getting volumes on node(s).
5579
5580   """
5581   REQ_BGL = False
5582   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583   _FIELDS_STATIC = utils.FieldSet("node")
5584
5585   def CheckArguments(self):
5586     _CheckOutputFields(static=self._FIELDS_STATIC,
5587                        dynamic=self._FIELDS_DYNAMIC,
5588                        selected=self.op.output_fields)
5589
5590   def ExpandNames(self):
5591     self.share_locks = _ShareAll()
5592
5593     if self.op.nodes:
5594       self.needed_locks = {
5595         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5596         }
5597     else:
5598       self.needed_locks = {
5599         locking.LEVEL_NODE: locking.ALL_SET,
5600         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5601         }
5602
5603   def Exec(self, feedback_fn):
5604     """Computes the list of nodes and their attributes.
5605
5606     """
5607     nodenames = self.owned_locks(locking.LEVEL_NODE)
5608     volumes = self.rpc.call_node_volumes(nodenames)
5609
5610     ilist = self.cfg.GetAllInstancesInfo()
5611     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5612
5613     output = []
5614     for node in nodenames:
5615       nresult = volumes[node]
5616       if nresult.offline:
5617         continue
5618       msg = nresult.fail_msg
5619       if msg:
5620         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5621         continue
5622
5623       node_vols = sorted(nresult.payload,
5624                          key=operator.itemgetter("dev"))
5625
5626       for vol in node_vols:
5627         node_output = []
5628         for field in self.op.output_fields:
5629           if field == "node":
5630             val = node
5631           elif field == "phys":
5632             val = vol["dev"]
5633           elif field == "vg":
5634             val = vol["vg"]
5635           elif field == "name":
5636             val = vol["name"]
5637           elif field == "size":
5638             val = int(float(vol["size"]))
5639           elif field == "instance":
5640             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5641           else:
5642             raise errors.ParameterError(field)
5643           node_output.append(str(val))
5644
5645         output.append(node_output)
5646
5647     return output
5648
5649
5650 class LUNodeQueryStorage(NoHooksLU):
5651   """Logical unit for getting information on storage units on node(s).
5652
5653   """
5654   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5655   REQ_BGL = False
5656
5657   def CheckArguments(self):
5658     _CheckOutputFields(static=self._FIELDS_STATIC,
5659                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660                        selected=self.op.output_fields)
5661
5662   def ExpandNames(self):
5663     self.share_locks = _ShareAll()
5664
5665     if self.op.nodes:
5666       self.needed_locks = {
5667         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5668         }
5669     else:
5670       self.needed_locks = {
5671         locking.LEVEL_NODE: locking.ALL_SET,
5672         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5673         }
5674
5675   def Exec(self, feedback_fn):
5676     """Computes the list of nodes and their attributes.
5677
5678     """
5679     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5680
5681     # Always get name to sort by
5682     if constants.SF_NAME in self.op.output_fields:
5683       fields = self.op.output_fields[:]
5684     else:
5685       fields = [constants.SF_NAME] + self.op.output_fields
5686
5687     # Never ask for node or type as it's only known to the LU
5688     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689       while extra in fields:
5690         fields.remove(extra)
5691
5692     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693     name_idx = field_idx[constants.SF_NAME]
5694
5695     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696     data = self.rpc.call_storage_list(self.nodes,
5697                                       self.op.storage_type, st_args,
5698                                       self.op.name, fields)
5699
5700     result = []
5701
5702     for node in utils.NiceSort(self.nodes):
5703       nresult = data[node]
5704       if nresult.offline:
5705         continue
5706
5707       msg = nresult.fail_msg
5708       if msg:
5709         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5710         continue
5711
5712       rows = dict([(row[name_idx], row) for row in nresult.payload])
5713
5714       for name in utils.NiceSort(rows.keys()):
5715         row = rows[name]
5716
5717         out = []
5718
5719         for field in self.op.output_fields:
5720           if field == constants.SF_NODE:
5721             val = node
5722           elif field == constants.SF_TYPE:
5723             val = self.op.storage_type
5724           elif field in field_idx:
5725             val = row[field_idx[field]]
5726           else:
5727             raise errors.ParameterError(field)
5728
5729           out.append(val)
5730
5731         result.append(out)
5732
5733     return result
5734
5735
5736 class _InstanceQuery(_QueryBase):
5737   FIELDS = query.INSTANCE_FIELDS
5738
5739   def ExpandNames(self, lu):
5740     lu.needed_locks = {}
5741     lu.share_locks = _ShareAll()
5742
5743     if self.names:
5744       self.wanted = _GetWantedInstances(lu, self.names)
5745     else:
5746       self.wanted = locking.ALL_SET
5747
5748     self.do_locking = (self.use_locking and
5749                        query.IQ_LIVE in self.requested_data)
5750     if self.do_locking:
5751       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753       lu.needed_locks[locking.LEVEL_NODE] = []
5754       lu.needed_locks[locking.LEVEL_NETWORK] = []
5755       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5756
5757     self.do_grouplocks = (self.do_locking and
5758                           query.IQ_NODES in self.requested_data)
5759
5760   def DeclareLocks(self, lu, level):
5761     if self.do_locking:
5762       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5763         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5764
5765         # Lock all groups used by instances optimistically; this requires going
5766         # via the node before it's locked, requiring verification later on
5767         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5768           set(group_uuid
5769               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5770               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5771       elif level == locking.LEVEL_NODE:
5772         lu._LockInstancesNodes() # pylint: disable=W0212
5773
5774       elif level == locking.LEVEL_NETWORK:
5775         lu.needed_locks[locking.LEVEL_NETWORK] = \
5776           frozenset(net_uuid
5777                     for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5778                     for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5779
5780   @staticmethod
5781   def _CheckGroupLocks(lu):
5782     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5783     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5784
5785     # Check if node groups for locked instances are still correct
5786     for instance_name in owned_instances:
5787       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5788
5789   def _GetQueryData(self, lu):
5790     """Computes the list of instances and their attributes.
5791
5792     """
5793     if self.do_grouplocks:
5794       self._CheckGroupLocks(lu)
5795
5796     cluster = lu.cfg.GetClusterInfo()
5797     all_info = lu.cfg.GetAllInstancesInfo()
5798
5799     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5800
5801     instance_list = [all_info[name] for name in instance_names]
5802     nodes = frozenset(itertools.chain(*(inst.all_nodes
5803                                         for inst in instance_list)))
5804     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5805     bad_nodes = []
5806     offline_nodes = []
5807     wrongnode_inst = set()
5808
5809     # Gather data as requested
5810     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5811       live_data = {}
5812       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5813       for name in nodes:
5814         result = node_data[name]
5815         if result.offline:
5816           # offline nodes will be in both lists
5817           assert result.fail_msg
5818           offline_nodes.append(name)
5819         if result.fail_msg:
5820           bad_nodes.append(name)
5821         elif result.payload:
5822           for inst in result.payload:
5823             if inst in all_info:
5824               if all_info[inst].primary_node == name:
5825                 live_data.update(result.payload)
5826               else:
5827                 wrongnode_inst.add(inst)
5828             else:
5829               # orphan instance; we don't list it here as we don't
5830               # handle this case yet in the output of instance listing
5831               logging.warning("Orphan instance '%s' found on node %s",
5832                               inst, name)
5833         # else no instance is alive
5834     else:
5835       live_data = {}
5836
5837     if query.IQ_DISKUSAGE in self.requested_data:
5838       gmi = ganeti.masterd.instance
5839       disk_usage = dict((inst.name,
5840                          gmi.ComputeDiskSize(inst.disk_template,
5841                                              [{constants.IDISK_SIZE: disk.size}
5842                                               for disk in inst.disks]))
5843                         for inst in instance_list)
5844     else:
5845       disk_usage = None
5846
5847     if query.IQ_CONSOLE in self.requested_data:
5848       consinfo = {}
5849       for inst in instance_list:
5850         if inst.name in live_data:
5851           # Instance is running
5852           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5853         else:
5854           consinfo[inst.name] = None
5855       assert set(consinfo.keys()) == set(instance_names)
5856     else:
5857       consinfo = None
5858
5859     if query.IQ_NODES in self.requested_data:
5860       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5861                                             instance_list)))
5862       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5863       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5864                     for uuid in set(map(operator.attrgetter("group"),
5865                                         nodes.values())))
5866     else:
5867       nodes = None
5868       groups = None
5869
5870     if query.IQ_NETWORKS in self.requested_data:
5871       net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5872                                     for i in instance_list))
5873       networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5874     else:
5875       networks = None
5876
5877     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5878                                    disk_usage, offline_nodes, bad_nodes,
5879                                    live_data, wrongnode_inst, consinfo,
5880                                    nodes, groups, networks)
5881
5882
5883 class LUQuery(NoHooksLU):
5884   """Query for resources/items of a certain kind.
5885
5886   """
5887   # pylint: disable=W0142
5888   REQ_BGL = False
5889
5890   def CheckArguments(self):
5891     qcls = _GetQueryImplementation(self.op.what)
5892
5893     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5894
5895   def ExpandNames(self):
5896     self.impl.ExpandNames(self)
5897
5898   def DeclareLocks(self, level):
5899     self.impl.DeclareLocks(self, level)
5900
5901   def Exec(self, feedback_fn):
5902     return self.impl.NewStyleQuery(self)
5903
5904
5905 class LUQueryFields(NoHooksLU):
5906   """Query for resources/items of a certain kind.
5907
5908   """
5909   # pylint: disable=W0142
5910   REQ_BGL = False
5911
5912   def CheckArguments(self):
5913     self.qcls = _GetQueryImplementation(self.op.what)
5914
5915   def ExpandNames(self):
5916     self.needed_locks = {}
5917
5918   def Exec(self, feedback_fn):
5919     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5920
5921
5922 class LUNodeModifyStorage(NoHooksLU):
5923   """Logical unit for modifying a storage volume on a node.
5924
5925   """
5926   REQ_BGL = False
5927
5928   def CheckArguments(self):
5929     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5930
5931     storage_type = self.op.storage_type
5932
5933     try:
5934       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5935     except KeyError:
5936       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5937                                  " modified" % storage_type,
5938                                  errors.ECODE_INVAL)
5939
5940     diff = set(self.op.changes.keys()) - modifiable
5941     if diff:
5942       raise errors.OpPrereqError("The following fields can not be modified for"
5943                                  " storage units of type '%s': %r" %
5944                                  (storage_type, list(diff)),
5945                                  errors.ECODE_INVAL)
5946
5947   def ExpandNames(self):
5948     self.needed_locks = {
5949       locking.LEVEL_NODE: self.op.node_name,
5950       }
5951
5952   def Exec(self, feedback_fn):
5953     """Computes the list of nodes and their attributes.
5954
5955     """
5956     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5957     result = self.rpc.call_storage_modify(self.op.node_name,
5958                                           self.op.storage_type, st_args,
5959                                           self.op.name, self.op.changes)
5960     result.Raise("Failed to modify storage unit '%s' on %s" %
5961                  (self.op.name, self.op.node_name))
5962
5963
5964 class LUNodeAdd(LogicalUnit):
5965   """Logical unit for adding node to the cluster.
5966
5967   """
5968   HPATH = "node-add"
5969   HTYPE = constants.HTYPE_NODE
5970   _NFLAGS = ["master_capable", "vm_capable"]
5971
5972   def CheckArguments(self):
5973     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5974     # validate/normalize the node name
5975     self.hostname = netutils.GetHostname(name=self.op.node_name,
5976                                          family=self.primary_ip_family)
5977     self.op.node_name = self.hostname.name
5978
5979     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5980       raise errors.OpPrereqError("Cannot readd the master node",
5981                                  errors.ECODE_STATE)
5982
5983     if self.op.readd and self.op.group:
5984       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5985                                  " being readded", errors.ECODE_INVAL)
5986
5987   def BuildHooksEnv(self):
5988     """Build hooks env.
5989
5990     This will run on all nodes before, and on all nodes + the new node after.
5991
5992     """
5993     return {
5994       "OP_TARGET": self.op.node_name,
5995       "NODE_NAME": self.op.node_name,
5996       "NODE_PIP": self.op.primary_ip,
5997       "NODE_SIP": self.op.secondary_ip,
5998       "MASTER_CAPABLE": str(self.op.master_capable),
5999       "VM_CAPABLE": str(self.op.vm_capable),
6000       }
6001
6002   def BuildHooksNodes(self):
6003     """Build hooks nodes.
6004
6005     """
6006     # Exclude added node
6007     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6008     post_nodes = pre_nodes + [self.op.node_name, ]
6009
6010     return (pre_nodes, post_nodes)
6011
6012   def CheckPrereq(self):
6013     """Check prerequisites.
6014
6015     This checks:
6016      - the new node is not already in the config
6017      - it is resolvable
6018      - its parameters (single/dual homed) matches the cluster
6019
6020     Any errors are signaled by raising errors.OpPrereqError.
6021
6022     """
6023     cfg = self.cfg
6024     hostname = self.hostname
6025     node = hostname.name
6026     primary_ip = self.op.primary_ip = hostname.ip
6027     if self.op.secondary_ip is None:
6028       if self.primary_ip_family == netutils.IP6Address.family:
6029         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6030                                    " IPv4 address must be given as secondary",
6031                                    errors.ECODE_INVAL)
6032       self.op.secondary_ip = primary_ip
6033
6034     secondary_ip = self.op.secondary_ip
6035     if not netutils.IP4Address.IsValid(secondary_ip):
6036       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6037                                  " address" % secondary_ip, errors.ECODE_INVAL)
6038
6039     node_list = cfg.GetNodeList()
6040     if not self.op.readd and node in node_list:
6041       raise errors.OpPrereqError("Node %s is already in the configuration" %
6042                                  node, errors.ECODE_EXISTS)
6043     elif self.op.readd and node not in node_list:
6044       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6045                                  errors.ECODE_NOENT)
6046
6047     self.changed_primary_ip = False
6048
6049     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6050       if self.op.readd and node == existing_node_name:
6051         if existing_node.secondary_ip != secondary_ip:
6052           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6053                                      " address configuration as before",
6054                                      errors.ECODE_INVAL)
6055         if existing_node.primary_ip != primary_ip:
6056           self.changed_primary_ip = True
6057
6058         continue
6059
6060       if (existing_node.primary_ip == primary_ip or
6061           existing_node.secondary_ip == primary_ip or
6062           existing_node.primary_ip == secondary_ip or
6063           existing_node.secondary_ip == secondary_ip):
6064         raise errors.OpPrereqError("New node ip address(es) conflict with"
6065                                    " existing node %s" % existing_node.name,
6066                                    errors.ECODE_NOTUNIQUE)
6067
6068     # After this 'if' block, None is no longer a valid value for the
6069     # _capable op attributes
6070     if self.op.readd:
6071       old_node = self.cfg.GetNodeInfo(node)
6072       assert old_node is not None, "Can't retrieve locked node %s" % node
6073       for attr in self._NFLAGS:
6074         if getattr(self.op, attr) is None:
6075           setattr(self.op, attr, getattr(old_node, attr))
6076     else:
6077       for attr in self._NFLAGS:
6078         if getattr(self.op, attr) is None:
6079           setattr(self.op, attr, True)
6080
6081     if self.op.readd and not self.op.vm_capable:
6082       pri, sec = cfg.GetNodeInstances(node)
6083       if pri or sec:
6084         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6085                                    " flag set to false, but it already holds"
6086                                    " instances" % node,
6087                                    errors.ECODE_STATE)
6088
6089     # check that the type of the node (single versus dual homed) is the
6090     # same as for the master
6091     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092     master_singlehomed = myself.secondary_ip == myself.primary_ip
6093     newbie_singlehomed = secondary_ip == primary_ip
6094     if master_singlehomed != newbie_singlehomed:
6095       if master_singlehomed:
6096         raise errors.OpPrereqError("The master has no secondary ip but the"
6097                                    " new node has one",
6098                                    errors.ECODE_INVAL)
6099       else:
6100         raise errors.OpPrereqError("The master has a secondary ip but the"
6101                                    " new node doesn't have one",
6102                                    errors.ECODE_INVAL)
6103
6104     # checks reachability
6105     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6106       raise errors.OpPrereqError("Node not reachable by ping",
6107                                  errors.ECODE_ENVIRON)
6108
6109     if not newbie_singlehomed:
6110       # check reachability from my secondary ip to newbie's secondary ip
6111       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6112                               source=myself.secondary_ip):
6113         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6114                                    " based ping to node daemon port",
6115                                    errors.ECODE_ENVIRON)
6116
6117     if self.op.readd:
6118       exceptions = [node]
6119     else:
6120       exceptions = []
6121
6122     if self.op.master_capable:
6123       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6124     else:
6125       self.master_candidate = False
6126
6127     if self.op.readd:
6128       self.new_node = old_node
6129     else:
6130       node_group = cfg.LookupNodeGroup(self.op.group)
6131       self.new_node = objects.Node(name=node,
6132                                    primary_ip=primary_ip,
6133                                    secondary_ip=secondary_ip,
6134                                    master_candidate=self.master_candidate,
6135                                    offline=False, drained=False,
6136                                    group=node_group, ndparams={})
6137
6138     if self.op.ndparams:
6139       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6140       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6141                             "node", "cluster or group")
6142
6143     if self.op.hv_state:
6144       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6145
6146     if self.op.disk_state:
6147       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6148
6149     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150     #       it a property on the base class.
6151     rpcrunner = rpc.DnsOnlyRunner()
6152     result = rpcrunner.call_version([node])[node]
6153     result.Raise("Can't get version information from node %s" % node)
6154     if constants.PROTOCOL_VERSION == result.payload:
6155       logging.info("Communication to node %s fine, sw version %s match",
6156                    node, result.payload)
6157     else:
6158       raise errors.OpPrereqError("Version mismatch master version %s,"
6159                                  " node version %s" %
6160                                  (constants.PROTOCOL_VERSION, result.payload),
6161                                  errors.ECODE_ENVIRON)
6162
6163     vg_name = cfg.GetVGName()
6164     if vg_name is not None:
6165       vparams = {constants.NV_PVLIST: [vg_name]}
6166       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167       cname = self.cfg.GetClusterName()
6168       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6169       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6170       if errmsgs:
6171         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6172                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6173
6174   def Exec(self, feedback_fn):
6175     """Adds the new node to the cluster.
6176
6177     """
6178     new_node = self.new_node
6179     node = new_node.name
6180
6181     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6182       "Not owning BGL"
6183
6184     # We adding a new node so we assume it's powered
6185     new_node.powered = True
6186
6187     # for re-adds, reset the offline/drained/master-candidate flags;
6188     # we need to reset here, otherwise offline would prevent RPC calls
6189     # later in the procedure; this also means that if the re-add
6190     # fails, we are left with a non-offlined, broken node
6191     if self.op.readd:
6192       new_node.drained = new_node.offline = False # pylint: disable=W0201
6193       self.LogInfo("Readding a node, the offline/drained flags were reset")
6194       # if we demote the node, we do cleanup later in the procedure
6195       new_node.master_candidate = self.master_candidate
6196       if self.changed_primary_ip:
6197         new_node.primary_ip = self.op.primary_ip
6198
6199     # copy the master/vm_capable flags
6200     for attr in self._NFLAGS:
6201       setattr(new_node, attr, getattr(self.op, attr))
6202
6203     # notify the user about any possible mc promotion
6204     if new_node.master_candidate:
6205       self.LogInfo("Node will be a master candidate")
6206
6207     if self.op.ndparams:
6208       new_node.ndparams = self.op.ndparams
6209     else:
6210       new_node.ndparams = {}
6211
6212     if self.op.hv_state:
6213       new_node.hv_state_static = self.new_hv_state
6214
6215     if self.op.disk_state:
6216       new_node.disk_state_static = self.new_disk_state
6217
6218     # Add node to our /etc/hosts, and add key to known_hosts
6219     if self.cfg.GetClusterInfo().modify_etc_hosts:
6220       master_node = self.cfg.GetMasterNode()
6221       result = self.rpc.call_etc_hosts_modify(master_node,
6222                                               constants.ETC_HOSTS_ADD,
6223                                               self.hostname.name,
6224                                               self.hostname.ip)
6225       result.Raise("Can't update hosts file with new host data")
6226
6227     if new_node.secondary_ip != new_node.primary_ip:
6228       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6229                                False)
6230
6231     node_verify_list = [self.cfg.GetMasterNode()]
6232     node_verify_param = {
6233       constants.NV_NODELIST: ([node], {}),
6234       # TODO: do a node-net-test as well?
6235     }
6236
6237     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6238                                        self.cfg.GetClusterName())
6239     for verifier in node_verify_list:
6240       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6241       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6242       if nl_payload:
6243         for failed in nl_payload:
6244           feedback_fn("ssh/hostname verification failed"
6245                       " (checking from %s): %s" %
6246                       (verifier, nl_payload[failed]))
6247         raise errors.OpExecError("ssh/hostname verification failed")
6248
6249     if self.op.readd:
6250       _RedistributeAncillaryFiles(self)
6251       self.context.ReaddNode(new_node)
6252       # make sure we redistribute the config
6253       self.cfg.Update(new_node, feedback_fn)
6254       # and make sure the new node will not have old files around
6255       if not new_node.master_candidate:
6256         result = self.rpc.call_node_demote_from_mc(new_node.name)
6257         msg = result.fail_msg
6258         if msg:
6259           self.LogWarning("Node failed to demote itself from master"
6260                           " candidate status: %s" % msg)
6261     else:
6262       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6263                                   additional_vm=self.op.vm_capable)
6264       self.context.AddNode(new_node, self.proc.GetECId())
6265
6266
6267 class LUNodeSetParams(LogicalUnit):
6268   """Modifies the parameters of a node.
6269
6270   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6271       to the node role (as _ROLE_*)
6272   @cvar _R2F: a dictionary from node role to tuples of flags
6273   @cvar _FLAGS: a list of attribute names corresponding to the flags
6274
6275   """
6276   HPATH = "node-modify"
6277   HTYPE = constants.HTYPE_NODE
6278   REQ_BGL = False
6279   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6280   _F2R = {
6281     (True, False, False): _ROLE_CANDIDATE,
6282     (False, True, False): _ROLE_DRAINED,
6283     (False, False, True): _ROLE_OFFLINE,
6284     (False, False, False): _ROLE_REGULAR,
6285     }
6286   _R2F = dict((v, k) for k, v in _F2R.items())
6287   _FLAGS = ["master_candidate", "drained", "offline"]
6288
6289   def CheckArguments(self):
6290     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6291     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6292                 self.op.master_capable, self.op.vm_capable,
6293                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6294                 self.op.disk_state]
6295     if all_mods.count(None) == len(all_mods):
6296       raise errors.OpPrereqError("Please pass at least one modification",
6297                                  errors.ECODE_INVAL)
6298     if all_mods.count(True) > 1:
6299       raise errors.OpPrereqError("Can't set the node into more than one"
6300                                  " state at the same time",
6301                                  errors.ECODE_INVAL)
6302
6303     # Boolean value that tells us whether we might be demoting from MC
6304     self.might_demote = (self.op.master_candidate is False or
6305                          self.op.offline is True or
6306                          self.op.drained is True or
6307                          self.op.master_capable is False)
6308
6309     if self.op.secondary_ip:
6310       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6311         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6312                                    " address" % self.op.secondary_ip,
6313                                    errors.ECODE_INVAL)
6314
6315     self.lock_all = self.op.auto_promote and self.might_demote
6316     self.lock_instances = self.op.secondary_ip is not None
6317
6318   def _InstanceFilter(self, instance):
6319     """Filter for getting affected instances.
6320
6321     """
6322     return (instance.disk_template in constants.DTS_INT_MIRROR and
6323             self.op.node_name in instance.all_nodes)
6324
6325   def ExpandNames(self):
6326     if self.lock_all:
6327       self.needed_locks = {
6328         locking.LEVEL_NODE: locking.ALL_SET,
6329
6330         # Block allocations when all nodes are locked
6331         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6332         }
6333     else:
6334       self.needed_locks = {
6335         locking.LEVEL_NODE: self.op.node_name,
6336         }
6337
6338     # Since modifying a node can have severe effects on currently running
6339     # operations the resource lock is at least acquired in shared mode
6340     self.needed_locks[locking.LEVEL_NODE_RES] = \
6341       self.needed_locks[locking.LEVEL_NODE]
6342
6343     # Get all locks except nodes in shared mode; they are not used for anything
6344     # but read-only access
6345     self.share_locks = _ShareAll()
6346     self.share_locks[locking.LEVEL_NODE] = 0
6347     self.share_locks[locking.LEVEL_NODE_RES] = 0
6348     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6349
6350     if self.lock_instances:
6351       self.needed_locks[locking.LEVEL_INSTANCE] = \
6352         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6353
6354   def BuildHooksEnv(self):
6355     """Build hooks env.
6356
6357     This runs on the master node.
6358
6359     """
6360     return {
6361       "OP_TARGET": self.op.node_name,
6362       "MASTER_CANDIDATE": str(self.op.master_candidate),
6363       "OFFLINE": str(self.op.offline),
6364       "DRAINED": str(self.op.drained),
6365       "MASTER_CAPABLE": str(self.op.master_capable),
6366       "VM_CAPABLE": str(self.op.vm_capable),
6367       }
6368
6369   def BuildHooksNodes(self):
6370     """Build hooks nodes.
6371
6372     """
6373     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6374     return (nl, nl)
6375
6376   def CheckPrereq(self):
6377     """Check prerequisites.
6378
6379     This only checks the instance list against the existing names.
6380
6381     """
6382     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6383
6384     if self.lock_instances:
6385       affected_instances = \
6386         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6387
6388       # Verify instance locks
6389       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6390       wanted_instances = frozenset(affected_instances.keys())
6391       if wanted_instances - owned_instances:
6392         raise errors.OpPrereqError("Instances affected by changing node %s's"
6393                                    " secondary IP address have changed since"
6394                                    " locks were acquired, wanted '%s', have"
6395                                    " '%s'; retry the operation" %
6396                                    (self.op.node_name,
6397                                     utils.CommaJoin(wanted_instances),
6398                                     utils.CommaJoin(owned_instances)),
6399                                    errors.ECODE_STATE)
6400     else:
6401       affected_instances = None
6402
6403     if (self.op.master_candidate is not None or
6404         self.op.drained is not None or
6405         self.op.offline is not None):
6406       # we can't change the master's node flags
6407       if self.op.node_name == self.cfg.GetMasterNode():
6408         raise errors.OpPrereqError("The master role can be changed"
6409                                    " only via master-failover",
6410                                    errors.ECODE_INVAL)
6411
6412     if self.op.master_candidate and not node.master_capable:
6413       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6414                                  " it a master candidate" % node.name,
6415                                  errors.ECODE_STATE)
6416
6417     if self.op.vm_capable is False:
6418       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6419       if ipri or isec:
6420         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6421                                    " the vm_capable flag" % node.name,
6422                                    errors.ECODE_STATE)
6423
6424     if node.master_candidate and self.might_demote and not self.lock_all:
6425       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6426       # check if after removing the current node, we're missing master
6427       # candidates
6428       (mc_remaining, mc_should, _) = \
6429           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6430       if mc_remaining < mc_should:
6431         raise errors.OpPrereqError("Not enough master candidates, please"
6432                                    " pass auto promote option to allow"
6433                                    " promotion (--auto-promote or RAPI"
6434                                    " auto_promote=True)", errors.ECODE_STATE)
6435
6436     self.old_flags = old_flags = (node.master_candidate,
6437                                   node.drained, node.offline)
6438     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6439     self.old_role = old_role = self._F2R[old_flags]
6440
6441     # Check for ineffective changes
6442     for attr in self._FLAGS:
6443       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6444         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6445         setattr(self.op, attr, None)
6446
6447     # Past this point, any flag change to False means a transition
6448     # away from the respective state, as only real changes are kept
6449
6450     # TODO: We might query the real power state if it supports OOB
6451     if _SupportsOob(self.cfg, node):
6452       if self.op.offline is False and not (node.powered or
6453                                            self.op.powered is True):
6454         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6455                                     " offline status can be reset") %
6456                                    self.op.node_name, errors.ECODE_STATE)
6457     elif self.op.powered is not None:
6458       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6459                                   " as it does not support out-of-band"
6460                                   " handling") % self.op.node_name,
6461                                  errors.ECODE_STATE)
6462
6463     # If we're being deofflined/drained, we'll MC ourself if needed
6464     if (self.op.drained is False or self.op.offline is False or
6465         (self.op.master_capable and not node.master_capable)):
6466       if _DecideSelfPromotion(self):
6467         self.op.master_candidate = True
6468         self.LogInfo("Auto-promoting node to master candidate")
6469
6470     # If we're no longer master capable, we'll demote ourselves from MC
6471     if self.op.master_capable is False and node.master_candidate:
6472       self.LogInfo("Demoting from master candidate")
6473       self.op.master_candidate = False
6474
6475     # Compute new role
6476     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6477     if self.op.master_candidate:
6478       new_role = self._ROLE_CANDIDATE
6479     elif self.op.drained:
6480       new_role = self._ROLE_DRAINED
6481     elif self.op.offline:
6482       new_role = self._ROLE_OFFLINE
6483     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6484       # False is still in new flags, which means we're un-setting (the
6485       # only) True flag
6486       new_role = self._ROLE_REGULAR
6487     else: # no new flags, nothing, keep old role
6488       new_role = old_role
6489
6490     self.new_role = new_role
6491
6492     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6493       # Trying to transition out of offline status
6494       result = self.rpc.call_version([node.name])[node.name]
6495       if result.fail_msg:
6496         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6497                                    " to report its version: %s" %
6498                                    (node.name, result.fail_msg),
6499                                    errors.ECODE_STATE)
6500       else:
6501         self.LogWarning("Transitioning node from offline to online state"
6502                         " without using re-add. Please make sure the node"
6503                         " is healthy!")
6504
6505     # When changing the secondary ip, verify if this is a single-homed to
6506     # multi-homed transition or vice versa, and apply the relevant
6507     # restrictions.
6508     if self.op.secondary_ip:
6509       # Ok even without locking, because this can't be changed by any LU
6510       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6511       master_singlehomed = master.secondary_ip == master.primary_ip
6512       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6513         if self.op.force and node.name == master.name:
6514           self.LogWarning("Transitioning from single-homed to multi-homed"
6515                           " cluster; all nodes will require a secondary IP"
6516                           " address")
6517         else:
6518           raise errors.OpPrereqError("Changing the secondary ip on a"
6519                                      " single-homed cluster requires the"
6520                                      " --force option to be passed, and the"
6521                                      " target node to be the master",
6522                                      errors.ECODE_INVAL)
6523       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6524         if self.op.force and node.name == master.name:
6525           self.LogWarning("Transitioning from multi-homed to single-homed"
6526                           " cluster; secondary IP addresses will have to be"
6527                           " removed")
6528         else:
6529           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6530                                      " same as the primary IP on a multi-homed"
6531                                      " cluster, unless the --force option is"
6532                                      " passed, and the target node is the"
6533                                      " master", errors.ECODE_INVAL)
6534
6535       assert not (frozenset(affected_instances) -
6536                   self.owned_locks(locking.LEVEL_INSTANCE))
6537
6538       if node.offline:
6539         if affected_instances:
6540           msg = ("Cannot change secondary IP address: offline node has"
6541                  " instances (%s) configured to use it" %
6542                  utils.CommaJoin(affected_instances.keys()))
6543           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6544       else:
6545         # On online nodes, check that no instances are running, and that
6546         # the node has the new ip and we can reach it.
6547         for instance in affected_instances.values():
6548           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6549                               msg="cannot change secondary ip")
6550
6551         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6552         if master.name != node.name:
6553           # check reachability from master secondary ip to new secondary ip
6554           if not netutils.TcpPing(self.op.secondary_ip,
6555                                   constants.DEFAULT_NODED_PORT,
6556                                   source=master.secondary_ip):
6557             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6558                                        " based ping to node daemon port",
6559                                        errors.ECODE_ENVIRON)
6560
6561     if self.op.ndparams:
6562       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6563       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6564       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6565                             "node", "cluster or group")
6566       self.new_ndparams = new_ndparams
6567
6568     if self.op.hv_state:
6569       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6570                                                  self.node.hv_state_static)
6571
6572     if self.op.disk_state:
6573       self.new_disk_state = \
6574         _MergeAndVerifyDiskState(self.op.disk_state,
6575                                  self.node.disk_state_static)
6576
6577   def Exec(self, feedback_fn):
6578     """Modifies a node.
6579
6580     """
6581     node = self.node
6582     old_role = self.old_role
6583     new_role = self.new_role
6584
6585     result = []
6586
6587     if self.op.ndparams:
6588       node.ndparams = self.new_ndparams
6589
6590     if self.op.powered is not None:
6591       node.powered = self.op.powered
6592
6593     if self.op.hv_state:
6594       node.hv_state_static = self.new_hv_state
6595
6596     if self.op.disk_state:
6597       node.disk_state_static = self.new_disk_state
6598
6599     for attr in ["master_capable", "vm_capable"]:
6600       val = getattr(self.op, attr)
6601       if val is not None:
6602         setattr(node, attr, val)
6603         result.append((attr, str(val)))
6604
6605     if new_role != old_role:
6606       # Tell the node to demote itself, if no longer MC and not offline
6607       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6608         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6609         if msg:
6610           self.LogWarning("Node failed to demote itself: %s", msg)
6611
6612       new_flags = self._R2F[new_role]
6613       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6614         if of != nf:
6615           result.append((desc, str(nf)))
6616       (node.master_candidate, node.drained, node.offline) = new_flags
6617
6618       # we locked all nodes, we adjust the CP before updating this node
6619       if self.lock_all:
6620         _AdjustCandidatePool(self, [node.name])
6621
6622     if self.op.secondary_ip:
6623       node.secondary_ip = self.op.secondary_ip
6624       result.append(("secondary_ip", self.op.secondary_ip))
6625
6626     # this will trigger configuration file update, if needed
6627     self.cfg.Update(node, feedback_fn)
6628
6629     # this will trigger job queue propagation or cleanup if the mc
6630     # flag changed
6631     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6632       self.context.ReaddNode(node)
6633
6634     return result
6635
6636
6637 class LUNodePowercycle(NoHooksLU):
6638   """Powercycles a node.
6639
6640   """
6641   REQ_BGL = False
6642
6643   def CheckArguments(self):
6644     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6645     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6646       raise errors.OpPrereqError("The node is the master and the force"
6647                                  " parameter was not set",
6648                                  errors.ECODE_INVAL)
6649
6650   def ExpandNames(self):
6651     """Locking for PowercycleNode.
6652
6653     This is a last-resort option and shouldn't block on other
6654     jobs. Therefore, we grab no locks.
6655
6656     """
6657     self.needed_locks = {}
6658
6659   def Exec(self, feedback_fn):
6660     """Reboots a node.
6661
6662     """
6663     result = self.rpc.call_node_powercycle(self.op.node_name,
6664                                            self.cfg.GetHypervisorType())
6665     result.Raise("Failed to schedule the reboot")
6666     return result.payload
6667
6668
6669 class LUClusterQuery(NoHooksLU):
6670   """Query cluster configuration.
6671
6672   """
6673   REQ_BGL = False
6674
6675   def ExpandNames(self):
6676     self.needed_locks = {}
6677
6678   def Exec(self, feedback_fn):
6679     """Return cluster config.
6680
6681     """
6682     cluster = self.cfg.GetClusterInfo()
6683     os_hvp = {}
6684
6685     # Filter just for enabled hypervisors
6686     for os_name, hv_dict in cluster.os_hvp.items():
6687       os_hvp[os_name] = {}
6688       for hv_name, hv_params in hv_dict.items():
6689         if hv_name in cluster.enabled_hypervisors:
6690           os_hvp[os_name][hv_name] = hv_params
6691
6692     # Convert ip_family to ip_version
6693     primary_ip_version = constants.IP4_VERSION
6694     if cluster.primary_ip_family == netutils.IP6Address.family:
6695       primary_ip_version = constants.IP6_VERSION
6696
6697     result = {
6698       "software_version": constants.RELEASE_VERSION,
6699       "protocol_version": constants.PROTOCOL_VERSION,
6700       "config_version": constants.CONFIG_VERSION,
6701       "os_api_version": max(constants.OS_API_VERSIONS),
6702       "export_version": constants.EXPORT_VERSION,
6703       "architecture": runtime.GetArchInfo(),
6704       "name": cluster.cluster_name,
6705       "master": cluster.master_node,
6706       "default_hypervisor": cluster.primary_hypervisor,
6707       "enabled_hypervisors": cluster.enabled_hypervisors,
6708       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6709                         for hypervisor_name in cluster.enabled_hypervisors]),
6710       "os_hvp": os_hvp,
6711       "beparams": cluster.beparams,
6712       "osparams": cluster.osparams,
6713       "ipolicy": cluster.ipolicy,
6714       "nicparams": cluster.nicparams,
6715       "ndparams": cluster.ndparams,
6716       "diskparams": cluster.diskparams,
6717       "candidate_pool_size": cluster.candidate_pool_size,
6718       "master_netdev": cluster.master_netdev,
6719       "master_netmask": cluster.master_netmask,
6720       "use_external_mip_script": cluster.use_external_mip_script,
6721       "volume_group_name": cluster.volume_group_name,
6722       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6723       "file_storage_dir": cluster.file_storage_dir,
6724       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6725       "maintain_node_health": cluster.maintain_node_health,
6726       "ctime": cluster.ctime,
6727       "mtime": cluster.mtime,
6728       "uuid": cluster.uuid,
6729       "tags": list(cluster.GetTags()),
6730       "uid_pool": cluster.uid_pool,
6731       "default_iallocator": cluster.default_iallocator,
6732       "reserved_lvs": cluster.reserved_lvs,
6733       "primary_ip_version": primary_ip_version,
6734       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6735       "hidden_os": cluster.hidden_os,
6736       "blacklisted_os": cluster.blacklisted_os,
6737       }
6738
6739     return result
6740
6741
6742 class LUClusterConfigQuery(NoHooksLU):
6743   """Return configuration values.
6744
6745   """
6746   REQ_BGL = False
6747
6748   def CheckArguments(self):
6749     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6750
6751   def ExpandNames(self):
6752     self.cq.ExpandNames(self)
6753
6754   def DeclareLocks(self, level):
6755     self.cq.DeclareLocks(self, level)
6756
6757   def Exec(self, feedback_fn):
6758     result = self.cq.OldStyleQuery(self)
6759
6760     assert len(result) == 1
6761
6762     return result[0]
6763
6764
6765 class _ClusterQuery(_QueryBase):
6766   FIELDS = query.CLUSTER_FIELDS
6767
6768   #: Do not sort (there is only one item)
6769   SORT_FIELD = None
6770
6771   def ExpandNames(self, lu):
6772     lu.needed_locks = {}
6773
6774     # The following variables interact with _QueryBase._GetNames
6775     self.wanted = locking.ALL_SET
6776     self.do_locking = self.use_locking
6777
6778     if self.do_locking:
6779       raise errors.OpPrereqError("Can not use locking for cluster queries",
6780                                  errors.ECODE_INVAL)
6781
6782   def DeclareLocks(self, lu, level):
6783     pass
6784
6785   def _GetQueryData(self, lu):
6786     """Computes the list of nodes and their attributes.
6787
6788     """
6789     # Locking is not used
6790     assert not (compat.any(lu.glm.is_owned(level)
6791                            for level in locking.LEVELS
6792                            if level != locking.LEVEL_CLUSTER) or
6793                 self.do_locking or self.use_locking)
6794
6795     if query.CQ_CONFIG in self.requested_data:
6796       cluster = lu.cfg.GetClusterInfo()
6797     else:
6798       cluster = NotImplemented
6799
6800     if query.CQ_QUEUE_DRAINED in self.requested_data:
6801       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6802     else:
6803       drain_flag = NotImplemented
6804
6805     if query.CQ_WATCHER_PAUSE in self.requested_data:
6806       master_name = lu.cfg.GetMasterNode()
6807
6808       result = lu.rpc.call_get_watcher_pause(master_name)
6809       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6810                    master_name)
6811
6812       watcher_pause = result.payload
6813     else:
6814       watcher_pause = NotImplemented
6815
6816     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6817
6818
6819 class LUInstanceActivateDisks(NoHooksLU):
6820   """Bring up an instance's disks.
6821
6822   """
6823   REQ_BGL = False
6824
6825   def ExpandNames(self):
6826     self._ExpandAndLockInstance()
6827     self.needed_locks[locking.LEVEL_NODE] = []
6828     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6829
6830   def DeclareLocks(self, level):
6831     if level == locking.LEVEL_NODE:
6832       self._LockInstancesNodes()
6833
6834   def CheckPrereq(self):
6835     """Check prerequisites.
6836
6837     This checks that the instance is in the cluster.
6838
6839     """
6840     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841     assert self.instance is not None, \
6842       "Cannot retrieve locked instance %s" % self.op.instance_name
6843     _CheckNodeOnline(self, self.instance.primary_node)
6844
6845   def Exec(self, feedback_fn):
6846     """Activate the disks.
6847
6848     """
6849     disks_ok, disks_info = \
6850               _AssembleInstanceDisks(self, self.instance,
6851                                      ignore_size=self.op.ignore_size)
6852     if not disks_ok:
6853       raise errors.OpExecError("Cannot activate block devices")
6854
6855     if self.op.wait_for_sync:
6856       if not _WaitForSync(self, self.instance):
6857         raise errors.OpExecError("Some disks of the instance are degraded!")
6858
6859     return disks_info
6860
6861
6862 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6863                            ignore_size=False):
6864   """Prepare the block devices for an instance.
6865
6866   This sets up the block devices on all nodes.
6867
6868   @type lu: L{LogicalUnit}
6869   @param lu: the logical unit on whose behalf we execute
6870   @type instance: L{objects.Instance}
6871   @param instance: the instance for whose disks we assemble
6872   @type disks: list of L{objects.Disk} or None
6873   @param disks: which disks to assemble (or all, if None)
6874   @type ignore_secondaries: boolean
6875   @param ignore_secondaries: if true, errors on secondary nodes
6876       won't result in an error return from the function
6877   @type ignore_size: boolean
6878   @param ignore_size: if true, the current known size of the disk
6879       will not be used during the disk activation, useful for cases
6880       when the size is wrong
6881   @return: False if the operation failed, otherwise a list of
6882       (host, instance_visible_name, node_visible_name)
6883       with the mapping from node devices to instance devices
6884
6885   """
6886   device_info = []
6887   disks_ok = True
6888   iname = instance.name
6889   disks = _ExpandCheckDisks(instance, disks)
6890
6891   # With the two passes mechanism we try to reduce the window of
6892   # opportunity for the race condition of switching DRBD to primary
6893   # before handshaking occured, but we do not eliminate it
6894
6895   # The proper fix would be to wait (with some limits) until the
6896   # connection has been made and drbd transitions from WFConnection
6897   # into any other network-connected state (Connected, SyncTarget,
6898   # SyncSource, etc.)
6899
6900   # 1st pass, assemble on all nodes in secondary mode
6901   for idx, inst_disk in enumerate(disks):
6902     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6903       if ignore_size:
6904         node_disk = node_disk.Copy()
6905         node_disk.UnsetSize()
6906       lu.cfg.SetDiskID(node_disk, node)
6907       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6908                                              False, idx)
6909       msg = result.fail_msg
6910       if msg:
6911         is_offline_secondary = (node in instance.secondary_nodes and
6912                                 result.offline)
6913         lu.LogWarning("Could not prepare block device %s on node %s"
6914                       " (is_primary=False, pass=1): %s",
6915                       inst_disk.iv_name, node, msg)
6916         if not (ignore_secondaries or is_offline_secondary):
6917           disks_ok = False
6918
6919   # FIXME: race condition on drbd migration to primary
6920
6921   # 2nd pass, do only the primary node
6922   for idx, inst_disk in enumerate(disks):
6923     dev_path = None
6924
6925     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6926       if node != instance.primary_node:
6927         continue
6928       if ignore_size:
6929         node_disk = node_disk.Copy()
6930         node_disk.UnsetSize()
6931       lu.cfg.SetDiskID(node_disk, node)
6932       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6933                                              True, idx)
6934       msg = result.fail_msg
6935       if msg:
6936         lu.LogWarning("Could not prepare block device %s on node %s"
6937                       " (is_primary=True, pass=2): %s",
6938                       inst_disk.iv_name, node, msg)
6939         disks_ok = False
6940       else:
6941         dev_path = result.payload
6942
6943     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6944
6945   # leave the disks configured for the primary node
6946   # this is a workaround that would be fixed better by
6947   # improving the logical/physical id handling
6948   for disk in disks:
6949     lu.cfg.SetDiskID(disk, instance.primary_node)
6950
6951   return disks_ok, device_info
6952
6953
6954 def _StartInstanceDisks(lu, instance, force):
6955   """Start the disks of an instance.
6956
6957   """
6958   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6959                                            ignore_secondaries=force)
6960   if not disks_ok:
6961     _ShutdownInstanceDisks(lu, instance)
6962     if force is not None and not force:
6963       lu.LogWarning("",
6964                     hint=("If the message above refers to a secondary node,"
6965                           " you can retry the operation using '--force'"))
6966     raise errors.OpExecError("Disk consistency error")
6967
6968
6969 class LUInstanceDeactivateDisks(NoHooksLU):
6970   """Shutdown an instance's disks.
6971
6972   """
6973   REQ_BGL = False
6974
6975   def ExpandNames(self):
6976     self._ExpandAndLockInstance()
6977     self.needed_locks[locking.LEVEL_NODE] = []
6978     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6979
6980   def DeclareLocks(self, level):
6981     if level == locking.LEVEL_NODE:
6982       self._LockInstancesNodes()
6983
6984   def CheckPrereq(self):
6985     """Check prerequisites.
6986
6987     This checks that the instance is in the cluster.
6988
6989     """
6990     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991     assert self.instance is not None, \
6992       "Cannot retrieve locked instance %s" % self.op.instance_name
6993
6994   def Exec(self, feedback_fn):
6995     """Deactivate the disks
6996
6997     """
6998     instance = self.instance
6999     if self.op.force:
7000       _ShutdownInstanceDisks(self, instance)
7001     else:
7002       _SafeShutdownInstanceDisks(self, instance)
7003
7004
7005 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7006   """Shutdown block devices of an instance.
7007
7008   This function checks if an instance is running, before calling
7009   _ShutdownInstanceDisks.
7010
7011   """
7012   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7013   _ShutdownInstanceDisks(lu, instance, disks=disks)
7014
7015
7016 def _ExpandCheckDisks(instance, disks):
7017   """Return the instance disks selected by the disks list
7018
7019   @type disks: list of L{objects.Disk} or None
7020   @param disks: selected disks
7021   @rtype: list of L{objects.Disk}
7022   @return: selected instance disks to act on
7023
7024   """
7025   if disks is None:
7026     return instance.disks
7027   else:
7028     if not set(disks).issubset(instance.disks):
7029       raise errors.ProgrammerError("Can only act on disks belonging to the"
7030                                    " target instance")
7031     return disks
7032
7033
7034 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7035   """Shutdown block devices of an instance.
7036
7037   This does the shutdown on all nodes of the instance.
7038
7039   If the ignore_primary is false, errors on the primary node are
7040   ignored.
7041
7042   """
7043   all_result = True
7044   disks = _ExpandCheckDisks(instance, disks)
7045
7046   for disk in disks:
7047     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7048       lu.cfg.SetDiskID(top_disk, node)
7049       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7050       msg = result.fail_msg
7051       if msg:
7052         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7053                       disk.iv_name, node, msg)
7054         if ((node == instance.primary_node and not ignore_primary) or
7055             (node != instance.primary_node and not result.offline)):
7056           all_result = False
7057   return all_result
7058
7059
7060 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7061   """Checks if a node has enough free memory.
7062
7063   This function checks if a given node has the needed amount of free
7064   memory. In case the node has less memory or we cannot get the
7065   information from the node, this function raises an OpPrereqError
7066   exception.
7067
7068   @type lu: C{LogicalUnit}
7069   @param lu: a logical unit from which we get configuration data
7070   @type node: C{str}
7071   @param node: the node to check
7072   @type reason: C{str}
7073   @param reason: string to use in the error message
7074   @type requested: C{int}
7075   @param requested: the amount of memory in MiB to check for
7076   @type hypervisor_name: C{str}
7077   @param hypervisor_name: the hypervisor to ask for memory stats
7078   @rtype: integer
7079   @return: node current free memory
7080   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7081       we cannot check the node
7082
7083   """
7084   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7085   nodeinfo[node].Raise("Can't get data from node %s" % node,
7086                        prereq=True, ecode=errors.ECODE_ENVIRON)
7087   (_, _, (hv_info, )) = nodeinfo[node].payload
7088
7089   free_mem = hv_info.get("memory_free", None)
7090   if not isinstance(free_mem, int):
7091     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7092                                " was '%s'" % (node, free_mem),
7093                                errors.ECODE_ENVIRON)
7094   if requested > free_mem:
7095     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7096                                " needed %s MiB, available %s MiB" %
7097                                (node, reason, requested, free_mem),
7098                                errors.ECODE_NORES)
7099   return free_mem
7100
7101
7102 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7103   """Checks if nodes have enough free disk space in all the VGs.
7104
7105   This function checks if all given nodes have the needed amount of
7106   free disk. In case any node has less disk or we cannot get the
7107   information from the node, this function raises an OpPrereqError
7108   exception.
7109
7110   @type lu: C{LogicalUnit}
7111   @param lu: a logical unit from which we get configuration data
7112   @type nodenames: C{list}
7113   @param nodenames: the list of node names to check
7114   @type req_sizes: C{dict}
7115   @param req_sizes: the hash of vg and corresponding amount of disk in
7116       MiB to check for
7117   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7118       or we cannot check the node
7119
7120   """
7121   for vg, req_size in req_sizes.items():
7122     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7123
7124
7125 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7126   """Checks if nodes have enough free disk space in the specified VG.
7127
7128   This function checks if all given nodes have the needed amount of
7129   free disk. In case any node has less disk or we cannot get the
7130   information from the node, this function raises an OpPrereqError
7131   exception.
7132
7133   @type lu: C{LogicalUnit}
7134   @param lu: a logical unit from which we get configuration data
7135   @type nodenames: C{list}
7136   @param nodenames: the list of node names to check
7137   @type vg: C{str}
7138   @param vg: the volume group to check
7139   @type requested: C{int}
7140   @param requested: the amount of disk in MiB to check for
7141   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7142       or we cannot check the node
7143
7144   """
7145   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7146   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7147   for node in nodenames:
7148     info = nodeinfo[node]
7149     info.Raise("Cannot get current information from node %s" % node,
7150                prereq=True, ecode=errors.ECODE_ENVIRON)
7151     (_, (vg_info, ), _) = info.payload
7152     vg_free = vg_info.get("vg_free", None)
7153     if not isinstance(vg_free, int):
7154       raise errors.OpPrereqError("Can't compute free disk space on node"
7155                                  " %s for vg %s, result was '%s'" %
7156                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7157     if requested > vg_free:
7158       raise errors.OpPrereqError("Not enough disk space on target node %s"
7159                                  " vg %s: required %d MiB, available %d MiB" %
7160                                  (node, vg, requested, vg_free),
7161                                  errors.ECODE_NORES)
7162
7163
7164 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7165   """Checks if nodes have enough physical CPUs
7166
7167   This function checks if all given nodes have the needed number of
7168   physical CPUs. In case any node has less CPUs or we cannot get the
7169   information from the node, this function raises an OpPrereqError
7170   exception.
7171
7172   @type lu: C{LogicalUnit}
7173   @param lu: a logical unit from which we get configuration data
7174   @type nodenames: C{list}
7175   @param nodenames: the list of node names to check
7176   @type requested: C{int}
7177   @param requested: the minimum acceptable number of physical CPUs
7178   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7179       or we cannot check the node
7180
7181   """
7182   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7183   for node in nodenames:
7184     info = nodeinfo[node]
7185     info.Raise("Cannot get current information from node %s" % node,
7186                prereq=True, ecode=errors.ECODE_ENVIRON)
7187     (_, _, (hv_info, )) = info.payload
7188     num_cpus = hv_info.get("cpu_total", None)
7189     if not isinstance(num_cpus, int):
7190       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7191                                  " on node %s, result was '%s'" %
7192                                  (node, num_cpus), errors.ECODE_ENVIRON)
7193     if requested > num_cpus:
7194       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7195                                  "required" % (node, num_cpus, requested),
7196                                  errors.ECODE_NORES)
7197
7198
7199 class LUInstanceStartup(LogicalUnit):
7200   """Starts an instance.
7201
7202   """
7203   HPATH = "instance-start"
7204   HTYPE = constants.HTYPE_INSTANCE
7205   REQ_BGL = False
7206
7207   def CheckArguments(self):
7208     # extra beparams
7209     if self.op.beparams:
7210       # fill the beparams dict
7211       objects.UpgradeBeParams(self.op.beparams)
7212       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7213
7214   def ExpandNames(self):
7215     self._ExpandAndLockInstance()
7216     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7217
7218   def DeclareLocks(self, level):
7219     if level == locking.LEVEL_NODE_RES:
7220       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7221
7222   def BuildHooksEnv(self):
7223     """Build hooks env.
7224
7225     This runs on master, primary and secondary nodes of the instance.
7226
7227     """
7228     env = {
7229       "FORCE": self.op.force,
7230       }
7231
7232     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7233
7234     return env
7235
7236   def BuildHooksNodes(self):
7237     """Build hooks nodes.
7238
7239     """
7240     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7241     return (nl, nl)
7242
7243   def CheckPrereq(self):
7244     """Check prerequisites.
7245
7246     This checks that the instance is in the cluster.
7247
7248     """
7249     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7250     assert self.instance is not None, \
7251       "Cannot retrieve locked instance %s" % self.op.instance_name
7252
7253     # extra hvparams
7254     if self.op.hvparams:
7255       # check hypervisor parameter syntax (locally)
7256       cluster = self.cfg.GetClusterInfo()
7257       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7258       filled_hvp = cluster.FillHV(instance)
7259       filled_hvp.update(self.op.hvparams)
7260       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7261       hv_type.CheckParameterSyntax(filled_hvp)
7262       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7263
7264     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7265
7266     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7267
7268     if self.primary_offline and self.op.ignore_offline_nodes:
7269       self.LogWarning("Ignoring offline primary node")
7270
7271       if self.op.hvparams or self.op.beparams:
7272         self.LogWarning("Overridden parameters are ignored")
7273     else:
7274       _CheckNodeOnline(self, instance.primary_node)
7275
7276       bep = self.cfg.GetClusterInfo().FillBE(instance)
7277       bep.update(self.op.beparams)
7278
7279       # check bridges existence
7280       _CheckInstanceBridgesExist(self, instance)
7281
7282       remote_info = self.rpc.call_instance_info(instance.primary_node,
7283                                                 instance.name,
7284                                                 instance.hypervisor)
7285       remote_info.Raise("Error checking node %s" % instance.primary_node,
7286                         prereq=True, ecode=errors.ECODE_ENVIRON)
7287       if not remote_info.payload: # not running already
7288         _CheckNodeFreeMemory(self, instance.primary_node,
7289                              "starting instance %s" % instance.name,
7290                              bep[constants.BE_MINMEM], instance.hypervisor)
7291
7292   def Exec(self, feedback_fn):
7293     """Start the instance.
7294
7295     """
7296     instance = self.instance
7297     force = self.op.force
7298
7299     if not self.op.no_remember:
7300       self.cfg.MarkInstanceUp(instance.name)
7301
7302     if self.primary_offline:
7303       assert self.op.ignore_offline_nodes
7304       self.LogInfo("Primary node offline, marked instance as started")
7305     else:
7306       node_current = instance.primary_node
7307
7308       _StartInstanceDisks(self, instance, force)
7309
7310       result = \
7311         self.rpc.call_instance_start(node_current,
7312                                      (instance, self.op.hvparams,
7313                                       self.op.beparams),
7314                                      self.op.startup_paused)
7315       msg = result.fail_msg
7316       if msg:
7317         _ShutdownInstanceDisks(self, instance)
7318         raise errors.OpExecError("Could not start instance: %s" % msg)
7319
7320
7321 class LUInstanceReboot(LogicalUnit):
7322   """Reboot an instance.
7323
7324   """
7325   HPATH = "instance-reboot"
7326   HTYPE = constants.HTYPE_INSTANCE
7327   REQ_BGL = False
7328
7329   def ExpandNames(self):
7330     self._ExpandAndLockInstance()
7331
7332   def BuildHooksEnv(self):
7333     """Build hooks env.
7334
7335     This runs on master, primary and secondary nodes of the instance.
7336
7337     """
7338     env = {
7339       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7340       "REBOOT_TYPE": self.op.reboot_type,
7341       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7342       }
7343
7344     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7345
7346     return env
7347
7348   def BuildHooksNodes(self):
7349     """Build hooks nodes.
7350
7351     """
7352     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7353     return (nl, nl)
7354
7355   def CheckPrereq(self):
7356     """Check prerequisites.
7357
7358     This checks that the instance is in the cluster.
7359
7360     """
7361     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7362     assert self.instance is not None, \
7363       "Cannot retrieve locked instance %s" % self.op.instance_name
7364     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7365     _CheckNodeOnline(self, instance.primary_node)
7366
7367     # check bridges existence
7368     _CheckInstanceBridgesExist(self, instance)
7369
7370   def Exec(self, feedback_fn):
7371     """Reboot the instance.
7372
7373     """
7374     instance = self.instance
7375     ignore_secondaries = self.op.ignore_secondaries
7376     reboot_type = self.op.reboot_type
7377
7378     remote_info = self.rpc.call_instance_info(instance.primary_node,
7379                                               instance.name,
7380                                               instance.hypervisor)
7381     remote_info.Raise("Error checking node %s" % instance.primary_node)
7382     instance_running = bool(remote_info.payload)
7383
7384     node_current = instance.primary_node
7385
7386     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7387                                             constants.INSTANCE_REBOOT_HARD]:
7388       for disk in instance.disks:
7389         self.cfg.SetDiskID(disk, node_current)
7390       result = self.rpc.call_instance_reboot(node_current, instance,
7391                                              reboot_type,
7392                                              self.op.shutdown_timeout)
7393       result.Raise("Could not reboot instance")
7394     else:
7395       if instance_running:
7396         result = self.rpc.call_instance_shutdown(node_current, instance,
7397                                                  self.op.shutdown_timeout)
7398         result.Raise("Could not shutdown instance for full reboot")
7399         _ShutdownInstanceDisks(self, instance)
7400       else:
7401         self.LogInfo("Instance %s was already stopped, starting now",
7402                      instance.name)
7403       _StartInstanceDisks(self, instance, ignore_secondaries)
7404       result = self.rpc.call_instance_start(node_current,
7405                                             (instance, None, None), False)
7406       msg = result.fail_msg
7407       if msg:
7408         _ShutdownInstanceDisks(self, instance)
7409         raise errors.OpExecError("Could not start instance for"
7410                                  " full reboot: %s" % msg)
7411
7412     self.cfg.MarkInstanceUp(instance.name)
7413
7414
7415 class LUInstanceShutdown(LogicalUnit):
7416   """Shutdown an instance.
7417
7418   """
7419   HPATH = "instance-stop"
7420   HTYPE = constants.HTYPE_INSTANCE
7421   REQ_BGL = False
7422
7423   def ExpandNames(self):
7424     self._ExpandAndLockInstance()
7425
7426   def BuildHooksEnv(self):
7427     """Build hooks env.
7428
7429     This runs on master, primary and secondary nodes of the instance.
7430
7431     """
7432     env = _BuildInstanceHookEnvByObject(self, self.instance)
7433     env["TIMEOUT"] = self.op.timeout
7434     return env
7435
7436   def BuildHooksNodes(self):
7437     """Build hooks nodes.
7438
7439     """
7440     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7441     return (nl, nl)
7442
7443   def CheckPrereq(self):
7444     """Check prerequisites.
7445
7446     This checks that the instance is in the cluster.
7447
7448     """
7449     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7450     assert self.instance is not None, \
7451       "Cannot retrieve locked instance %s" % self.op.instance_name
7452
7453     if not self.op.force:
7454       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7455     else:
7456       self.LogWarning("Ignoring offline instance check")
7457
7458     self.primary_offline = \
7459       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7460
7461     if self.primary_offline and self.op.ignore_offline_nodes:
7462       self.LogWarning("Ignoring offline primary node")
7463     else:
7464       _CheckNodeOnline(self, self.instance.primary_node)
7465
7466   def Exec(self, feedback_fn):
7467     """Shutdown the instance.
7468
7469     """
7470     instance = self.instance
7471     node_current = instance.primary_node
7472     timeout = self.op.timeout
7473
7474     # If the instance is offline we shouldn't mark it as down, as that
7475     # resets the offline flag.
7476     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7477       self.cfg.MarkInstanceDown(instance.name)
7478
7479     if self.primary_offline:
7480       assert self.op.ignore_offline_nodes
7481       self.LogInfo("Primary node offline, marked instance as stopped")
7482     else:
7483       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7484       msg = result.fail_msg
7485       if msg:
7486         self.LogWarning("Could not shutdown instance: %s", msg)
7487
7488       _ShutdownInstanceDisks(self, instance)
7489
7490
7491 class LUInstanceReinstall(LogicalUnit):
7492   """Reinstall an instance.
7493
7494   """
7495   HPATH = "instance-reinstall"
7496   HTYPE = constants.HTYPE_INSTANCE
7497   REQ_BGL = False
7498
7499   def ExpandNames(self):
7500     self._ExpandAndLockInstance()
7501
7502   def BuildHooksEnv(self):
7503     """Build hooks env.
7504
7505     This runs on master, primary and secondary nodes of the instance.
7506
7507     """
7508     return _BuildInstanceHookEnvByObject(self, self.instance)
7509
7510   def BuildHooksNodes(self):
7511     """Build hooks nodes.
7512
7513     """
7514     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7515     return (nl, nl)
7516
7517   def CheckPrereq(self):
7518     """Check prerequisites.
7519
7520     This checks that the instance is in the cluster and is not running.
7521
7522     """
7523     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7524     assert instance is not None, \
7525       "Cannot retrieve locked instance %s" % self.op.instance_name
7526     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7527                      " offline, cannot reinstall")
7528
7529     if instance.disk_template == constants.DT_DISKLESS:
7530       raise errors.OpPrereqError("Instance '%s' has no disks" %
7531                                  self.op.instance_name,
7532                                  errors.ECODE_INVAL)
7533     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7534
7535     if self.op.os_type is not None:
7536       # OS verification
7537       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7538       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7539       instance_os = self.op.os_type
7540     else:
7541       instance_os = instance.os
7542
7543     nodelist = list(instance.all_nodes)
7544
7545     if self.op.osparams:
7546       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7547       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7548       self.os_inst = i_osdict # the new dict (without defaults)
7549     else:
7550       self.os_inst = None
7551
7552     self.instance = instance
7553
7554   def Exec(self, feedback_fn):
7555     """Reinstall the instance.
7556
7557     """
7558     inst = self.instance
7559
7560     if self.op.os_type is not None:
7561       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7562       inst.os = self.op.os_type
7563       # Write to configuration
7564       self.cfg.Update(inst, feedback_fn)
7565
7566     _StartInstanceDisks(self, inst, None)
7567     try:
7568       feedback_fn("Running the instance OS create scripts...")
7569       # FIXME: pass debug option from opcode to backend
7570       result = self.rpc.call_instance_os_add(inst.primary_node,
7571                                              (inst, self.os_inst), True,
7572                                              self.op.debug_level)
7573       result.Raise("Could not install OS for instance %s on node %s" %
7574                    (inst.name, inst.primary_node))
7575     finally:
7576       _ShutdownInstanceDisks(self, inst)
7577
7578
7579 class LUInstanceRecreateDisks(LogicalUnit):
7580   """Recreate an instance's missing disks.
7581
7582   """
7583   HPATH = "instance-recreate-disks"
7584   HTYPE = constants.HTYPE_INSTANCE
7585   REQ_BGL = False
7586
7587   _MODIFYABLE = compat.UniqueFrozenset([
7588     constants.IDISK_SIZE,
7589     constants.IDISK_MODE,
7590     ])
7591
7592   # New or changed disk parameters may have different semantics
7593   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7594     constants.IDISK_ADOPT,
7595
7596     # TODO: Implement support changing VG while recreating
7597     constants.IDISK_VG,
7598     constants.IDISK_METAVG,
7599     constants.IDISK_PROVIDER,
7600     ]))
7601
7602   def _RunAllocator(self):
7603     """Run the allocator based on input opcode.
7604
7605     """
7606     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7607
7608     # FIXME
7609     # The allocator should actually run in "relocate" mode, but current
7610     # allocators don't support relocating all the nodes of an instance at
7611     # the same time. As a workaround we use "allocate" mode, but this is
7612     # suboptimal for two reasons:
7613     # - The instance name passed to the allocator is present in the list of
7614     #   existing instances, so there could be a conflict within the
7615     #   internal structures of the allocator. This doesn't happen with the
7616     #   current allocators, but it's a liability.
7617     # - The allocator counts the resources used by the instance twice: once
7618     #   because the instance exists already, and once because it tries to
7619     #   allocate a new instance.
7620     # The allocator could choose some of the nodes on which the instance is
7621     # running, but that's not a problem. If the instance nodes are broken,
7622     # they should be already be marked as drained or offline, and hence
7623     # skipped by the allocator. If instance disks have been lost for other
7624     # reasons, then recreating the disks on the same nodes should be fine.
7625     disk_template = self.instance.disk_template
7626     spindle_use = be_full[constants.BE_SPINDLE_USE]
7627     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7628                                         disk_template=disk_template,
7629                                         tags=list(self.instance.GetTags()),
7630                                         os=self.instance.os,
7631                                         nics=[{}],
7632                                         vcpus=be_full[constants.BE_VCPUS],
7633                                         memory=be_full[constants.BE_MAXMEM],
7634                                         spindle_use=spindle_use,
7635                                         disks=[{constants.IDISK_SIZE: d.size,
7636                                                 constants.IDISK_MODE: d.mode}
7637                                                 for d in self.instance.disks],
7638                                         hypervisor=self.instance.hypervisor,
7639                                         node_whitelist=None)
7640     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7641
7642     ial.Run(self.op.iallocator)
7643
7644     assert req.RequiredNodes() == len(self.instance.all_nodes)
7645
7646     if not ial.success:
7647       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7648                                  " %s" % (self.op.iallocator, ial.info),
7649                                  errors.ECODE_NORES)
7650
7651     self.op.nodes = ial.result
7652     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7653                  self.op.instance_name, self.op.iallocator,
7654                  utils.CommaJoin(ial.result))
7655
7656   def CheckArguments(self):
7657     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7658       # Normalize and convert deprecated list of disk indices
7659       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7660
7661     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7662     if duplicates:
7663       raise errors.OpPrereqError("Some disks have been specified more than"
7664                                  " once: %s" % utils.CommaJoin(duplicates),
7665                                  errors.ECODE_INVAL)
7666
7667     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7668     # when neither iallocator nor nodes are specified
7669     if self.op.iallocator or self.op.nodes:
7670       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7671
7672     for (idx, params) in self.op.disks:
7673       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7674       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7675       if unsupported:
7676         raise errors.OpPrereqError("Parameters for disk %s try to change"
7677                                    " unmodifyable parameter(s): %s" %
7678                                    (idx, utils.CommaJoin(unsupported)),
7679                                    errors.ECODE_INVAL)
7680
7681   def ExpandNames(self):
7682     self._ExpandAndLockInstance()
7683     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7684
7685     if self.op.nodes:
7686       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7687       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7688     else:
7689       self.needed_locks[locking.LEVEL_NODE] = []
7690       if self.op.iallocator:
7691         # iallocator will select a new node in the same group
7692         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7693         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7694
7695     self.needed_locks[locking.LEVEL_NODE_RES] = []
7696
7697   def DeclareLocks(self, level):
7698     if level == locking.LEVEL_NODEGROUP:
7699       assert self.op.iallocator is not None
7700       assert not self.op.nodes
7701       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7702       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7703       # Lock the primary group used by the instance optimistically; this
7704       # requires going via the node before it's locked, requiring
7705       # verification later on
7706       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7707         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7708
7709     elif level == locking.LEVEL_NODE:
7710       # If an allocator is used, then we lock all the nodes in the current
7711       # instance group, as we don't know yet which ones will be selected;
7712       # if we replace the nodes without using an allocator, locks are
7713       # already declared in ExpandNames; otherwise, we need to lock all the
7714       # instance nodes for disk re-creation
7715       if self.op.iallocator:
7716         assert not self.op.nodes
7717         assert not self.needed_locks[locking.LEVEL_NODE]
7718         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7719
7720         # Lock member nodes of the group of the primary node
7721         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7722           self.needed_locks[locking.LEVEL_NODE].extend(
7723             self.cfg.GetNodeGroup(group_uuid).members)
7724
7725         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7726       elif not self.op.nodes:
7727         self._LockInstancesNodes(primary_only=False)
7728     elif level == locking.LEVEL_NODE_RES:
7729       # Copy node locks
7730       self.needed_locks[locking.LEVEL_NODE_RES] = \
7731         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7732
7733   def BuildHooksEnv(self):
7734     """Build hooks env.
7735
7736     This runs on master, primary and secondary nodes of the instance.
7737
7738     """
7739     return _BuildInstanceHookEnvByObject(self, self.instance)
7740
7741   def BuildHooksNodes(self):
7742     """Build hooks nodes.
7743
7744     """
7745     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7746     return (nl, nl)
7747
7748   def CheckPrereq(self):
7749     """Check prerequisites.
7750
7751     This checks that the instance is in the cluster and is not running.
7752
7753     """
7754     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7755     assert instance is not None, \
7756       "Cannot retrieve locked instance %s" % self.op.instance_name
7757     if self.op.nodes:
7758       if len(self.op.nodes) != len(instance.all_nodes):
7759         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7760                                    " %d replacement nodes were specified" %
7761                                    (instance.name, len(instance.all_nodes),
7762                                     len(self.op.nodes)),
7763                                    errors.ECODE_INVAL)
7764       assert instance.disk_template != constants.DT_DRBD8 or \
7765           len(self.op.nodes) == 2
7766       assert instance.disk_template != constants.DT_PLAIN or \
7767           len(self.op.nodes) == 1
7768       primary_node = self.op.nodes[0]
7769     else:
7770       primary_node = instance.primary_node
7771     if not self.op.iallocator:
7772       _CheckNodeOnline(self, primary_node)
7773
7774     if instance.disk_template == constants.DT_DISKLESS:
7775       raise errors.OpPrereqError("Instance '%s' has no disks" %
7776                                  self.op.instance_name, errors.ECODE_INVAL)
7777
7778     # Verify if node group locks are still correct
7779     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7780     if owned_groups:
7781       # Node group locks are acquired only for the primary node (and only
7782       # when the allocator is used)
7783       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7784                                primary_only=True)
7785
7786     # if we replace nodes *and* the old primary is offline, we don't
7787     # check the instance state
7788     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7789     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7790       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7791                           msg="cannot recreate disks")
7792
7793     if self.op.disks:
7794       self.disks = dict(self.op.disks)
7795     else:
7796       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7797
7798     maxidx = max(self.disks.keys())
7799     if maxidx >= len(instance.disks):
7800       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7801                                  errors.ECODE_INVAL)
7802
7803     if ((self.op.nodes or self.op.iallocator) and
7804         sorted(self.disks.keys()) != range(len(instance.disks))):
7805       raise errors.OpPrereqError("Can't recreate disks partially and"
7806                                  " change the nodes at the same time",
7807                                  errors.ECODE_INVAL)
7808
7809     self.instance = instance
7810
7811     if self.op.iallocator:
7812       self._RunAllocator()
7813       # Release unneeded node and node resource locks
7814       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7815       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7816       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7817
7818     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7819
7820   def Exec(self, feedback_fn):
7821     """Recreate the disks.
7822
7823     """
7824     instance = self.instance
7825
7826     assert (self.owned_locks(locking.LEVEL_NODE) ==
7827             self.owned_locks(locking.LEVEL_NODE_RES))
7828
7829     to_skip = []
7830     mods = [] # keeps track of needed changes
7831
7832     for idx, disk in enumerate(instance.disks):
7833       try:
7834         changes = self.disks[idx]
7835       except KeyError:
7836         # Disk should not be recreated
7837         to_skip.append(idx)
7838         continue
7839
7840       # update secondaries for disks, if needed
7841       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7842         # need to update the nodes and minors
7843         assert len(self.op.nodes) == 2
7844         assert len(disk.logical_id) == 6 # otherwise disk internals
7845                                          # have changed
7846         (_, _, old_port, _, _, old_secret) = disk.logical_id
7847         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7848         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7849                   new_minors[0], new_minors[1], old_secret)
7850         assert len(disk.logical_id) == len(new_id)
7851       else:
7852         new_id = None
7853
7854       mods.append((idx, new_id, changes))
7855
7856     # now that we have passed all asserts above, we can apply the mods
7857     # in a single run (to avoid partial changes)
7858     for idx, new_id, changes in mods:
7859       disk = instance.disks[idx]
7860       if new_id is not None:
7861         assert disk.dev_type == constants.LD_DRBD8
7862         disk.logical_id = new_id
7863       if changes:
7864         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7865                     mode=changes.get(constants.IDISK_MODE, None))
7866
7867     # change primary node, if needed
7868     if self.op.nodes:
7869       instance.primary_node = self.op.nodes[0]
7870       self.LogWarning("Changing the instance's nodes, you will have to"
7871                       " remove any disks left on the older nodes manually")
7872
7873     if self.op.nodes:
7874       self.cfg.Update(instance, feedback_fn)
7875
7876     # All touched nodes must be locked
7877     mylocks = self.owned_locks(locking.LEVEL_NODE)
7878     assert mylocks.issuperset(frozenset(instance.all_nodes))
7879     _CreateDisks(self, instance, to_skip=to_skip)
7880
7881
7882 class LUInstanceRename(LogicalUnit):
7883   """Rename an instance.
7884
7885   """
7886   HPATH = "instance-rename"
7887   HTYPE = constants.HTYPE_INSTANCE
7888
7889   def CheckArguments(self):
7890     """Check arguments.
7891
7892     """
7893     if self.op.ip_check and not self.op.name_check:
7894       # TODO: make the ip check more flexible and not depend on the name check
7895       raise errors.OpPrereqError("IP address check requires a name check",
7896                                  errors.ECODE_INVAL)
7897
7898   def BuildHooksEnv(self):
7899     """Build hooks env.
7900
7901     This runs on master, primary and secondary nodes of the instance.
7902
7903     """
7904     env = _BuildInstanceHookEnvByObject(self, self.instance)
7905     env["INSTANCE_NEW_NAME"] = self.op.new_name
7906     return env
7907
7908   def BuildHooksNodes(self):
7909     """Build hooks nodes.
7910
7911     """
7912     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7913     return (nl, nl)
7914
7915   def CheckPrereq(self):
7916     """Check prerequisites.
7917
7918     This checks that the instance is in the cluster and is not running.
7919
7920     """
7921     self.op.instance_name = _ExpandInstanceName(self.cfg,
7922                                                 self.op.instance_name)
7923     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7924     assert instance is not None
7925     _CheckNodeOnline(self, instance.primary_node)
7926     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7927                         msg="cannot rename")
7928     self.instance = instance
7929
7930     new_name = self.op.new_name
7931     if self.op.name_check:
7932       hostname = _CheckHostnameSane(self, new_name)
7933       new_name = self.op.new_name = hostname.name
7934       if (self.op.ip_check and
7935           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7936         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7937                                    (hostname.ip, new_name),
7938                                    errors.ECODE_NOTUNIQUE)
7939
7940     instance_list = self.cfg.GetInstanceList()
7941     if new_name in instance_list and new_name != instance.name:
7942       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7943                                  new_name, errors.ECODE_EXISTS)
7944
7945   def Exec(self, feedback_fn):
7946     """Rename the instance.
7947
7948     """
7949     inst = self.instance
7950     old_name = inst.name
7951
7952     rename_file_storage = False
7953     if (inst.disk_template in constants.DTS_FILEBASED and
7954         self.op.new_name != inst.name):
7955       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7956       rename_file_storage = True
7957
7958     self.cfg.RenameInstance(inst.name, self.op.new_name)
7959     # Change the instance lock. This is definitely safe while we hold the BGL.
7960     # Otherwise the new lock would have to be added in acquired mode.
7961     assert self.REQ_BGL
7962     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7963     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7964     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7965
7966     # re-read the instance from the configuration after rename
7967     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7968
7969     if rename_file_storage:
7970       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7971       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7972                                                      old_file_storage_dir,
7973                                                      new_file_storage_dir)
7974       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7975                    " (but the instance has been renamed in Ganeti)" %
7976                    (inst.primary_node, old_file_storage_dir,
7977                     new_file_storage_dir))
7978
7979     _StartInstanceDisks(self, inst, None)
7980     # update info on disks
7981     info = _GetInstanceInfoText(inst)
7982     for (idx, disk) in enumerate(inst.disks):
7983       for node in inst.all_nodes:
7984         self.cfg.SetDiskID(disk, node)
7985         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7986         if result.fail_msg:
7987           self.LogWarning("Error setting info on node %s for disk %s: %s",
7988                           node, idx, result.fail_msg)
7989     try:
7990       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7991                                                  old_name, self.op.debug_level)
7992       msg = result.fail_msg
7993       if msg:
7994         msg = ("Could not run OS rename script for instance %s on node %s"
7995                " (but the instance has been renamed in Ganeti): %s" %
7996                (inst.name, inst.primary_node, msg))
7997         self.LogWarning(msg)
7998     finally:
7999       _ShutdownInstanceDisks(self, inst)
8000
8001     return inst.name
8002
8003
8004 class LUInstanceRemove(LogicalUnit):
8005   """Remove an instance.
8006
8007   """
8008   HPATH = "instance-remove"
8009   HTYPE = constants.HTYPE_INSTANCE
8010   REQ_BGL = False
8011
8012   def ExpandNames(self):
8013     self._ExpandAndLockInstance()
8014     self.needed_locks[locking.LEVEL_NODE] = []
8015     self.needed_locks[locking.LEVEL_NODE_RES] = []
8016     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8017
8018   def DeclareLocks(self, level):
8019     if level == locking.LEVEL_NODE:
8020       self._LockInstancesNodes()
8021     elif level == locking.LEVEL_NODE_RES:
8022       # Copy node locks
8023       self.needed_locks[locking.LEVEL_NODE_RES] = \
8024         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8025
8026   def BuildHooksEnv(self):
8027     """Build hooks env.
8028
8029     This runs on master, primary and secondary nodes of the instance.
8030
8031     """
8032     env = _BuildInstanceHookEnvByObject(self, self.instance)
8033     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8034     return env
8035
8036   def BuildHooksNodes(self):
8037     """Build hooks nodes.
8038
8039     """
8040     nl = [self.cfg.GetMasterNode()]
8041     nl_post = list(self.instance.all_nodes) + nl
8042     return (nl, nl_post)
8043
8044   def CheckPrereq(self):
8045     """Check prerequisites.
8046
8047     This checks that the instance is in the cluster.
8048
8049     """
8050     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8051     assert self.instance is not None, \
8052       "Cannot retrieve locked instance %s" % self.op.instance_name
8053
8054   def Exec(self, feedback_fn):
8055     """Remove the instance.
8056
8057     """
8058     instance = self.instance
8059     logging.info("Shutting down instance %s on node %s",
8060                  instance.name, instance.primary_node)
8061
8062     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8063                                              self.op.shutdown_timeout)
8064     msg = result.fail_msg
8065     if msg:
8066       if self.op.ignore_failures:
8067         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8068       else:
8069         raise errors.OpExecError("Could not shutdown instance %s on"
8070                                  " node %s: %s" %
8071                                  (instance.name, instance.primary_node, msg))
8072
8073     assert (self.owned_locks(locking.LEVEL_NODE) ==
8074             self.owned_locks(locking.LEVEL_NODE_RES))
8075     assert not (set(instance.all_nodes) -
8076                 self.owned_locks(locking.LEVEL_NODE)), \
8077       "Not owning correct locks"
8078
8079     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8080
8081
8082 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8083   """Utility function to remove an instance.
8084
8085   """
8086   logging.info("Removing block devices for instance %s", instance.name)
8087
8088   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8089     if not ignore_failures:
8090       raise errors.OpExecError("Can't remove instance's disks")
8091     feedback_fn("Warning: can't remove instance's disks")
8092
8093   logging.info("Removing instance %s out of cluster config", instance.name)
8094
8095   lu.cfg.RemoveInstance(instance.name)
8096
8097   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8098     "Instance lock removal conflict"
8099
8100   # Remove lock for the instance
8101   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8102
8103
8104 class LUInstanceQuery(NoHooksLU):
8105   """Logical unit for querying instances.
8106
8107   """
8108   # pylint: disable=W0142
8109   REQ_BGL = False
8110
8111   def CheckArguments(self):
8112     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8113                              self.op.output_fields, self.op.use_locking)
8114
8115   def ExpandNames(self):
8116     self.iq.ExpandNames(self)
8117
8118   def DeclareLocks(self, level):
8119     self.iq.DeclareLocks(self, level)
8120
8121   def Exec(self, feedback_fn):
8122     return self.iq.OldStyleQuery(self)
8123
8124
8125 def _ExpandNamesForMigration(lu):
8126   """Expands names for use with L{TLMigrateInstance}.
8127
8128   @type lu: L{LogicalUnit}
8129
8130   """
8131   if lu.op.target_node is not None:
8132     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8133
8134   lu.needed_locks[locking.LEVEL_NODE] = []
8135   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8136
8137   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8138   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8139
8140   # The node allocation lock is actually only needed for replicated instances
8141   # (e.g. DRBD8) and if an iallocator is used.
8142   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8143
8144
8145 def _DeclareLocksForMigration(lu, level):
8146   """Declares locks for L{TLMigrateInstance}.
8147
8148   @type lu: L{LogicalUnit}
8149   @param level: Lock level
8150
8151   """
8152   if level == locking.LEVEL_NODE_ALLOC:
8153     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8154
8155     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8156
8157     # Node locks are already declared here rather than at LEVEL_NODE as we need
8158     # the instance object anyway to declare the node allocation lock.
8159     if instance.disk_template in constants.DTS_EXT_MIRROR:
8160       if lu.op.target_node is None:
8161         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8162         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8163       else:
8164         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8165                                                lu.op.target_node]
8166       del lu.recalculate_locks[locking.LEVEL_NODE]
8167     else:
8168       lu._LockInstancesNodes() # pylint: disable=W0212
8169
8170   elif level == locking.LEVEL_NODE:
8171     # Node locks are declared together with the node allocation lock
8172     assert (lu.needed_locks[locking.LEVEL_NODE] or
8173             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8174
8175   elif level == locking.LEVEL_NODE_RES:
8176     # Copy node locks
8177     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8178       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8179
8180
8181 class LUInstanceFailover(LogicalUnit):
8182   """Failover an instance.
8183
8184   """
8185   HPATH = "instance-failover"
8186   HTYPE = constants.HTYPE_INSTANCE
8187   REQ_BGL = False
8188
8189   def CheckArguments(self):
8190     """Check the arguments.
8191
8192     """
8193     self.iallocator = getattr(self.op, "iallocator", None)
8194     self.target_node = getattr(self.op, "target_node", None)
8195
8196   def ExpandNames(self):
8197     self._ExpandAndLockInstance()
8198     _ExpandNamesForMigration(self)
8199
8200     self._migrater = \
8201       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8202                         self.op.ignore_consistency, True,
8203                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8204
8205     self.tasklets = [self._migrater]
8206
8207   def DeclareLocks(self, level):
8208     _DeclareLocksForMigration(self, level)
8209
8210   def BuildHooksEnv(self):
8211     """Build hooks env.
8212
8213     This runs on master, primary and secondary nodes of the instance.
8214
8215     """
8216     instance = self._migrater.instance
8217     source_node = instance.primary_node
8218     target_node = self.op.target_node
8219     env = {
8220       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8221       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8222       "OLD_PRIMARY": source_node,
8223       "NEW_PRIMARY": target_node,
8224       }
8225
8226     if instance.disk_template in constants.DTS_INT_MIRROR:
8227       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8228       env["NEW_SECONDARY"] = source_node
8229     else:
8230       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8231
8232     env.update(_BuildInstanceHookEnvByObject(self, instance))
8233
8234     return env
8235
8236   def BuildHooksNodes(self):
8237     """Build hooks nodes.
8238
8239     """
8240     instance = self._migrater.instance
8241     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8242     return (nl, nl + [instance.primary_node])
8243
8244
8245 class LUInstanceMigrate(LogicalUnit):
8246   """Migrate an instance.
8247
8248   This is migration without shutting down, compared to the failover,
8249   which is done with shutdown.
8250
8251   """
8252   HPATH = "instance-migrate"
8253   HTYPE = constants.HTYPE_INSTANCE
8254   REQ_BGL = False
8255
8256   def ExpandNames(self):
8257     self._ExpandAndLockInstance()
8258     _ExpandNamesForMigration(self)
8259
8260     self._migrater = \
8261       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8262                         False, self.op.allow_failover, False,
8263                         self.op.allow_runtime_changes,
8264                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8265                         self.op.ignore_ipolicy)
8266
8267     self.tasklets = [self._migrater]
8268
8269   def DeclareLocks(self, level):
8270     _DeclareLocksForMigration(self, level)
8271
8272   def BuildHooksEnv(self):
8273     """Build hooks env.
8274
8275     This runs on master, primary and secondary nodes of the instance.
8276
8277     """
8278     instance = self._migrater.instance
8279     source_node = instance.primary_node
8280     target_node = self.op.target_node
8281     env = _BuildInstanceHookEnvByObject(self, instance)
8282     env.update({
8283       "MIGRATE_LIVE": self._migrater.live,
8284       "MIGRATE_CLEANUP": self.op.cleanup,
8285       "OLD_PRIMARY": source_node,
8286       "NEW_PRIMARY": target_node,
8287       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8288       })
8289
8290     if instance.disk_template in constants.DTS_INT_MIRROR:
8291       env["OLD_SECONDARY"] = target_node
8292       env["NEW_SECONDARY"] = source_node
8293     else:
8294       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8295
8296     return env
8297
8298   def BuildHooksNodes(self):
8299     """Build hooks nodes.
8300
8301     """
8302     instance = self._migrater.instance
8303     snodes = list(instance.secondary_nodes)
8304     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8305     return (nl, nl)
8306
8307
8308 class LUInstanceMove(LogicalUnit):
8309   """Move an instance by data-copying.
8310
8311   """
8312   HPATH = "instance-move"
8313   HTYPE = constants.HTYPE_INSTANCE
8314   REQ_BGL = False
8315
8316   def ExpandNames(self):
8317     self._ExpandAndLockInstance()
8318     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8319     self.op.target_node = target_node
8320     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8321     self.needed_locks[locking.LEVEL_NODE_RES] = []
8322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8323
8324   def DeclareLocks(self, level):
8325     if level == locking.LEVEL_NODE:
8326       self._LockInstancesNodes(primary_only=True)
8327     elif level == locking.LEVEL_NODE_RES:
8328       # Copy node locks
8329       self.needed_locks[locking.LEVEL_NODE_RES] = \
8330         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8331
8332   def BuildHooksEnv(self):
8333     """Build hooks env.
8334
8335     This runs on master, primary and secondary nodes of the instance.
8336
8337     """
8338     env = {
8339       "TARGET_NODE": self.op.target_node,
8340       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8341       }
8342     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8343     return env
8344
8345   def BuildHooksNodes(self):
8346     """Build hooks nodes.
8347
8348     """
8349     nl = [
8350       self.cfg.GetMasterNode(),
8351       self.instance.primary_node,
8352       self.op.target_node,
8353       ]
8354     return (nl, nl)
8355
8356   def CheckPrereq(self):
8357     """Check prerequisites.
8358
8359     This checks that the instance is in the cluster.
8360
8361     """
8362     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8363     assert self.instance is not None, \
8364       "Cannot retrieve locked instance %s" % self.op.instance_name
8365
8366     node = self.cfg.GetNodeInfo(self.op.target_node)
8367     assert node is not None, \
8368       "Cannot retrieve locked node %s" % self.op.target_node
8369
8370     self.target_node = target_node = node.name
8371
8372     if target_node == instance.primary_node:
8373       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8374                                  (instance.name, target_node),
8375                                  errors.ECODE_STATE)
8376
8377     bep = self.cfg.GetClusterInfo().FillBE(instance)
8378
8379     for idx, dsk in enumerate(instance.disks):
8380       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8381         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8382                                    " cannot copy" % idx, errors.ECODE_STATE)
8383
8384     _CheckNodeOnline(self, target_node)
8385     _CheckNodeNotDrained(self, target_node)
8386     _CheckNodeVmCapable(self, target_node)
8387     cluster = self.cfg.GetClusterInfo()
8388     group_info = self.cfg.GetNodeGroup(node.group)
8389     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8390     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8391                             ignore=self.op.ignore_ipolicy)
8392
8393     if instance.admin_state == constants.ADMINST_UP:
8394       # check memory requirements on the secondary node
8395       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8396                            instance.name, bep[constants.BE_MAXMEM],
8397                            instance.hypervisor)
8398     else:
8399       self.LogInfo("Not checking memory on the secondary node as"
8400                    " instance will not be started")
8401
8402     # check bridge existance
8403     _CheckInstanceBridgesExist(self, instance, node=target_node)
8404
8405   def Exec(self, feedback_fn):
8406     """Move an instance.
8407
8408     The move is done by shutting it down on its present node, copying
8409     the data over (slow) and starting it on the new node.
8410
8411     """
8412     instance = self.instance
8413
8414     source_node = instance.primary_node
8415     target_node = self.target_node
8416
8417     self.LogInfo("Shutting down instance %s on source node %s",
8418                  instance.name, source_node)
8419
8420     assert (self.owned_locks(locking.LEVEL_NODE) ==
8421             self.owned_locks(locking.LEVEL_NODE_RES))
8422
8423     result = self.rpc.call_instance_shutdown(source_node, instance,
8424                                              self.op.shutdown_timeout)
8425     msg = result.fail_msg
8426     if msg:
8427       if self.op.ignore_consistency:
8428         self.LogWarning("Could not shutdown instance %s on node %s."
8429                         " Proceeding anyway. Please make sure node"
8430                         " %s is down. Error details: %s",
8431                         instance.name, source_node, source_node, msg)
8432       else:
8433         raise errors.OpExecError("Could not shutdown instance %s on"
8434                                  " node %s: %s" %
8435                                  (instance.name, source_node, msg))
8436
8437     # create the target disks
8438     try:
8439       _CreateDisks(self, instance, target_node=target_node)
8440     except errors.OpExecError:
8441       self.LogWarning("Device creation failed, reverting...")
8442       try:
8443         _RemoveDisks(self, instance, target_node=target_node)
8444       finally:
8445         self.cfg.ReleaseDRBDMinors(instance.name)
8446         raise
8447
8448     cluster_name = self.cfg.GetClusterInfo().cluster_name
8449
8450     errs = []
8451     # activate, get path, copy the data over
8452     for idx, disk in enumerate(instance.disks):
8453       self.LogInfo("Copying data for disk %d", idx)
8454       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8455                                                instance.name, True, idx)
8456       if result.fail_msg:
8457         self.LogWarning("Can't assemble newly created disk %d: %s",
8458                         idx, result.fail_msg)
8459         errs.append(result.fail_msg)
8460         break
8461       dev_path = result.payload
8462       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8463                                              target_node, dev_path,
8464                                              cluster_name)
8465       if result.fail_msg:
8466         self.LogWarning("Can't copy data over for disk %d: %s",
8467                         idx, result.fail_msg)
8468         errs.append(result.fail_msg)
8469         break
8470
8471     if errs:
8472       self.LogWarning("Some disks failed to copy, aborting")
8473       try:
8474         _RemoveDisks(self, instance, target_node=target_node)
8475       finally:
8476         self.cfg.ReleaseDRBDMinors(instance.name)
8477         raise errors.OpExecError("Errors during disk copy: %s" %
8478                                  (",".join(errs),))
8479
8480     instance.primary_node = target_node
8481     self.cfg.Update(instance, feedback_fn)
8482
8483     self.LogInfo("Removing the disks on the original node")
8484     _RemoveDisks(self, instance, target_node=source_node)
8485
8486     # Only start the instance if it's marked as up
8487     if instance.admin_state == constants.ADMINST_UP:
8488       self.LogInfo("Starting instance %s on node %s",
8489                    instance.name, target_node)
8490
8491       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8492                                            ignore_secondaries=True)
8493       if not disks_ok:
8494         _ShutdownInstanceDisks(self, instance)
8495         raise errors.OpExecError("Can't activate the instance's disks")
8496
8497       result = self.rpc.call_instance_start(target_node,
8498                                             (instance, None, None), False)
8499       msg = result.fail_msg
8500       if msg:
8501         _ShutdownInstanceDisks(self, instance)
8502         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8503                                  (instance.name, target_node, msg))
8504
8505
8506 class LUNodeMigrate(LogicalUnit):
8507   """Migrate all instances from a node.
8508
8509   """
8510   HPATH = "node-migrate"
8511   HTYPE = constants.HTYPE_NODE
8512   REQ_BGL = False
8513
8514   def CheckArguments(self):
8515     pass
8516
8517   def ExpandNames(self):
8518     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8519
8520     self.share_locks = _ShareAll()
8521     self.needed_locks = {
8522       locking.LEVEL_NODE: [self.op.node_name],
8523       }
8524
8525   def BuildHooksEnv(self):
8526     """Build hooks env.
8527
8528     This runs on the master, the primary and all the secondaries.
8529
8530     """
8531     return {
8532       "NODE_NAME": self.op.node_name,
8533       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8534       }
8535
8536   def BuildHooksNodes(self):
8537     """Build hooks nodes.
8538
8539     """
8540     nl = [self.cfg.GetMasterNode()]
8541     return (nl, nl)
8542
8543   def CheckPrereq(self):
8544     pass
8545
8546   def Exec(self, feedback_fn):
8547     # Prepare jobs for migration instances
8548     allow_runtime_changes = self.op.allow_runtime_changes
8549     jobs = [
8550       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8551                                  mode=self.op.mode,
8552                                  live=self.op.live,
8553                                  iallocator=self.op.iallocator,
8554                                  target_node=self.op.target_node,
8555                                  allow_runtime_changes=allow_runtime_changes,
8556                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8557       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8558
8559     # TODO: Run iallocator in this opcode and pass correct placement options to
8560     # OpInstanceMigrate. Since other jobs can modify the cluster between
8561     # running the iallocator and the actual migration, a good consistency model
8562     # will have to be found.
8563
8564     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8565             frozenset([self.op.node_name]))
8566
8567     return ResultWithJobs(jobs)
8568
8569
8570 class TLMigrateInstance(Tasklet):
8571   """Tasklet class for instance migration.
8572
8573   @type live: boolean
8574   @ivar live: whether the migration will be done live or non-live;
8575       this variable is initalized only after CheckPrereq has run
8576   @type cleanup: boolean
8577   @ivar cleanup: Wheater we cleanup from a failed migration
8578   @type iallocator: string
8579   @ivar iallocator: The iallocator used to determine target_node
8580   @type target_node: string
8581   @ivar target_node: If given, the target_node to reallocate the instance to
8582   @type failover: boolean
8583   @ivar failover: Whether operation results in failover or migration
8584   @type fallback: boolean
8585   @ivar fallback: Whether fallback to failover is allowed if migration not
8586                   possible
8587   @type ignore_consistency: boolean
8588   @ivar ignore_consistency: Wheter we should ignore consistency between source
8589                             and target node
8590   @type shutdown_timeout: int
8591   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8592   @type ignore_ipolicy: bool
8593   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8594
8595   """
8596
8597   # Constants
8598   _MIGRATION_POLL_INTERVAL = 1      # seconds
8599   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8600
8601   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8602                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8603                ignore_ipolicy):
8604     """Initializes this class.
8605
8606     """
8607     Tasklet.__init__(self, lu)
8608
8609     # Parameters
8610     self.instance_name = instance_name
8611     self.cleanup = cleanup
8612     self.live = False # will be overridden later
8613     self.failover = failover
8614     self.fallback = fallback
8615     self.ignore_consistency = ignore_consistency
8616     self.shutdown_timeout = shutdown_timeout
8617     self.ignore_ipolicy = ignore_ipolicy
8618     self.allow_runtime_changes = allow_runtime_changes
8619
8620   def CheckPrereq(self):
8621     """Check prerequisites.
8622
8623     This checks that the instance is in the cluster.
8624
8625     """
8626     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8627     instance = self.cfg.GetInstanceInfo(instance_name)
8628     assert instance is not None
8629     self.instance = instance
8630     cluster = self.cfg.GetClusterInfo()
8631
8632     if (not self.cleanup and
8633         not instance.admin_state == constants.ADMINST_UP and
8634         not self.failover and self.fallback):
8635       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8636                       " switching to failover")
8637       self.failover = True
8638
8639     if instance.disk_template not in constants.DTS_MIRRORED:
8640       if self.failover:
8641         text = "failovers"
8642       else:
8643         text = "migrations"
8644       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8645                                  " %s" % (instance.disk_template, text),
8646                                  errors.ECODE_STATE)
8647
8648     if instance.disk_template in constants.DTS_EXT_MIRROR:
8649       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8650
8651       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8652
8653       if self.lu.op.iallocator:
8654         self._RunAllocator()
8655       else:
8656         # We set set self.target_node as it is required by
8657         # BuildHooksEnv
8658         self.target_node = self.lu.op.target_node
8659
8660       # Check that the target node is correct in terms of instance policy
8661       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8662       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8663       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8664                                                               group_info)
8665       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8666                               ignore=self.ignore_ipolicy)
8667
8668       # self.target_node is already populated, either directly or by the
8669       # iallocator run
8670       target_node = self.target_node
8671       if self.target_node == instance.primary_node:
8672         raise errors.OpPrereqError("Cannot migrate instance %s"
8673                                    " to its primary (%s)" %
8674                                    (instance.name, instance.primary_node),
8675                                    errors.ECODE_STATE)
8676
8677       if len(self.lu.tasklets) == 1:
8678         # It is safe to release locks only when we're the only tasklet
8679         # in the LU
8680         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8681                       keep=[instance.primary_node, self.target_node])
8682         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8683
8684     else:
8685       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8686
8687       secondary_nodes = instance.secondary_nodes
8688       if not secondary_nodes:
8689         raise errors.ConfigurationError("No secondary node but using"
8690                                         " %s disk template" %
8691                                         instance.disk_template)
8692       target_node = secondary_nodes[0]
8693       if self.lu.op.iallocator or (self.lu.op.target_node and
8694                                    self.lu.op.target_node != target_node):
8695         if self.failover:
8696           text = "failed over"
8697         else:
8698           text = "migrated"
8699         raise errors.OpPrereqError("Instances with disk template %s cannot"
8700                                    " be %s to arbitrary nodes"
8701                                    " (neither an iallocator nor a target"
8702                                    " node can be passed)" %
8703                                    (instance.disk_template, text),
8704                                    errors.ECODE_INVAL)
8705       nodeinfo = self.cfg.GetNodeInfo(target_node)
8706       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8707       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8708                                                               group_info)
8709       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8710                               ignore=self.ignore_ipolicy)
8711
8712     i_be = cluster.FillBE(instance)
8713
8714     # check memory requirements on the secondary node
8715     if (not self.cleanup and
8716          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8717       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8718                                                "migrating instance %s" %
8719                                                instance.name,
8720                                                i_be[constants.BE_MINMEM],
8721                                                instance.hypervisor)
8722     else:
8723       self.lu.LogInfo("Not checking memory on the secondary node as"
8724                       " instance will not be started")
8725
8726     # check if failover must be forced instead of migration
8727     if (not self.cleanup and not self.failover and
8728         i_be[constants.BE_ALWAYS_FAILOVER]):
8729       self.lu.LogInfo("Instance configured to always failover; fallback"
8730                       " to failover")
8731       self.failover = True
8732
8733     # check bridge existance
8734     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8735
8736     if not self.cleanup:
8737       _CheckNodeNotDrained(self.lu, target_node)
8738       if not self.failover:
8739         result = self.rpc.call_instance_migratable(instance.primary_node,
8740                                                    instance)
8741         if result.fail_msg and self.fallback:
8742           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8743                           " failover")
8744           self.failover = True
8745         else:
8746           result.Raise("Can't migrate, please use failover",
8747                        prereq=True, ecode=errors.ECODE_STATE)
8748
8749     assert not (self.failover and self.cleanup)
8750
8751     if not self.failover:
8752       if self.lu.op.live is not None and self.lu.op.mode is not None:
8753         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8754                                    " parameters are accepted",
8755                                    errors.ECODE_INVAL)
8756       if self.lu.op.live is not None:
8757         if self.lu.op.live:
8758           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8759         else:
8760           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8761         # reset the 'live' parameter to None so that repeated
8762         # invocations of CheckPrereq do not raise an exception
8763         self.lu.op.live = None
8764       elif self.lu.op.mode is None:
8765         # read the default value from the hypervisor
8766         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8767         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8768
8769       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8770     else:
8771       # Failover is never live
8772       self.live = False
8773
8774     if not (self.failover or self.cleanup):
8775       remote_info = self.rpc.call_instance_info(instance.primary_node,
8776                                                 instance.name,
8777                                                 instance.hypervisor)
8778       remote_info.Raise("Error checking instance on node %s" %
8779                         instance.primary_node)
8780       instance_running = bool(remote_info.payload)
8781       if instance_running:
8782         self.current_mem = int(remote_info.payload["memory"])
8783
8784   def _RunAllocator(self):
8785     """Run the allocator based on input opcode.
8786
8787     """
8788     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8789
8790     # FIXME: add a self.ignore_ipolicy option
8791     req = iallocator.IAReqRelocate(name=self.instance_name,
8792                                    relocate_from=[self.instance.primary_node])
8793     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8794
8795     ial.Run(self.lu.op.iallocator)
8796
8797     if not ial.success:
8798       raise errors.OpPrereqError("Can't compute nodes using"
8799                                  " iallocator '%s': %s" %
8800                                  (self.lu.op.iallocator, ial.info),
8801                                  errors.ECODE_NORES)
8802     self.target_node = ial.result[0]
8803     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8804                     self.instance_name, self.lu.op.iallocator,
8805                     utils.CommaJoin(ial.result))
8806
8807   def _WaitUntilSync(self):
8808     """Poll with custom rpc for disk sync.
8809
8810     This uses our own step-based rpc call.
8811
8812     """
8813     self.feedback_fn("* wait until resync is done")
8814     all_done = False
8815     while not all_done:
8816       all_done = True
8817       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8818                                             self.nodes_ip,
8819                                             (self.instance.disks,
8820                                              self.instance))
8821       min_percent = 100
8822       for node, nres in result.items():
8823         nres.Raise("Cannot resync disks on node %s" % node)
8824         node_done, node_percent = nres.payload
8825         all_done = all_done and node_done
8826         if node_percent is not None:
8827           min_percent = min(min_percent, node_percent)
8828       if not all_done:
8829         if min_percent < 100:
8830           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8831         time.sleep(2)
8832
8833   def _EnsureSecondary(self, node):
8834     """Demote a node to secondary.
8835
8836     """
8837     self.feedback_fn("* switching node %s to secondary mode" % node)
8838
8839     for dev in self.instance.disks:
8840       self.cfg.SetDiskID(dev, node)
8841
8842     result = self.rpc.call_blockdev_close(node, self.instance.name,
8843                                           self.instance.disks)
8844     result.Raise("Cannot change disk to secondary on node %s" % node)
8845
8846   def _GoStandalone(self):
8847     """Disconnect from the network.
8848
8849     """
8850     self.feedback_fn("* changing into standalone mode")
8851     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8852                                                self.instance.disks)
8853     for node, nres in result.items():
8854       nres.Raise("Cannot disconnect disks node %s" % node)
8855
8856   def _GoReconnect(self, multimaster):
8857     """Reconnect to the network.
8858
8859     """
8860     if multimaster:
8861       msg = "dual-master"
8862     else:
8863       msg = "single-master"
8864     self.feedback_fn("* changing disks into %s mode" % msg)
8865     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8866                                            (self.instance.disks, self.instance),
8867                                            self.instance.name, multimaster)
8868     for node, nres in result.items():
8869       nres.Raise("Cannot change disks config on node %s" % node)
8870
8871   def _ExecCleanup(self):
8872     """Try to cleanup after a failed migration.
8873
8874     The cleanup is done by:
8875       - check that the instance is running only on one node
8876         (and update the config if needed)
8877       - change disks on its secondary node to secondary
8878       - wait until disks are fully synchronized
8879       - disconnect from the network
8880       - change disks into single-master mode
8881       - wait again until disks are fully synchronized
8882
8883     """
8884     instance = self.instance
8885     target_node = self.target_node
8886     source_node = self.source_node
8887
8888     # check running on only one node
8889     self.feedback_fn("* checking where the instance actually runs"
8890                      " (if this hangs, the hypervisor might be in"
8891                      " a bad state)")
8892     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8893     for node, result in ins_l.items():
8894       result.Raise("Can't contact node %s" % node)
8895
8896     runningon_source = instance.name in ins_l[source_node].payload
8897     runningon_target = instance.name in ins_l[target_node].payload
8898
8899     if runningon_source and runningon_target:
8900       raise errors.OpExecError("Instance seems to be running on two nodes,"
8901                                " or the hypervisor is confused; you will have"
8902                                " to ensure manually that it runs only on one"
8903                                " and restart this operation")
8904
8905     if not (runningon_source or runningon_target):
8906       raise errors.OpExecError("Instance does not seem to be running at all;"
8907                                " in this case it's safer to repair by"
8908                                " running 'gnt-instance stop' to ensure disk"
8909                                " shutdown, and then restarting it")
8910
8911     if runningon_target:
8912       # the migration has actually succeeded, we need to update the config
8913       self.feedback_fn("* instance running on secondary node (%s),"
8914                        " updating config" % target_node)
8915       instance.primary_node = target_node
8916       self.cfg.Update(instance, self.feedback_fn)
8917       demoted_node = source_node
8918     else:
8919       self.feedback_fn("* instance confirmed to be running on its"
8920                        " primary node (%s)" % source_node)
8921       demoted_node = target_node
8922
8923     if instance.disk_template in constants.DTS_INT_MIRROR:
8924       self._EnsureSecondary(demoted_node)
8925       try:
8926         self._WaitUntilSync()
8927       except errors.OpExecError:
8928         # we ignore here errors, since if the device is standalone, it
8929         # won't be able to sync
8930         pass
8931       self._GoStandalone()
8932       self._GoReconnect(False)
8933       self._WaitUntilSync()
8934
8935     self.feedback_fn("* done")
8936
8937   def _RevertDiskStatus(self):
8938     """Try to revert the disk status after a failed migration.
8939
8940     """
8941     target_node = self.target_node
8942     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8943       return
8944
8945     try:
8946       self._EnsureSecondary(target_node)
8947       self._GoStandalone()
8948       self._GoReconnect(False)
8949       self._WaitUntilSync()
8950     except errors.OpExecError, err:
8951       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8952                          " please try to recover the instance manually;"
8953                          " error '%s'" % str(err))
8954
8955   def _AbortMigration(self):
8956     """Call the hypervisor code to abort a started migration.
8957
8958     """
8959     instance = self.instance
8960     target_node = self.target_node
8961     source_node = self.source_node
8962     migration_info = self.migration_info
8963
8964     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8965                                                                  instance,
8966                                                                  migration_info,
8967                                                                  False)
8968     abort_msg = abort_result.fail_msg
8969     if abort_msg:
8970       logging.error("Aborting migration failed on target node %s: %s",
8971                     target_node, abort_msg)
8972       # Don't raise an exception here, as we stil have to try to revert the
8973       # disk status, even if this step failed.
8974
8975     abort_result = self.rpc.call_instance_finalize_migration_src(
8976       source_node, instance, False, self.live)
8977     abort_msg = abort_result.fail_msg
8978     if abort_msg:
8979       logging.error("Aborting migration failed on source node %s: %s",
8980                     source_node, abort_msg)
8981
8982   def _ExecMigration(self):
8983     """Migrate an instance.
8984
8985     The migrate is done by:
8986       - change the disks into dual-master mode
8987       - wait until disks are fully synchronized again
8988       - migrate the instance
8989       - change disks on the new secondary node (the old primary) to secondary
8990       - wait until disks are fully synchronized
8991       - change disks into single-master mode
8992
8993     """
8994     instance = self.instance
8995     target_node = self.target_node
8996     source_node = self.source_node
8997
8998     # Check for hypervisor version mismatch and warn the user.
8999     nodeinfo = self.rpc.call_node_info([source_node, target_node],
9000                                        None, [self.instance.hypervisor], False)
9001     for ninfo in nodeinfo.values():
9002       ninfo.Raise("Unable to retrieve node information from node '%s'" %
9003                   ninfo.node)
9004     (_, _, (src_info, )) = nodeinfo[source_node].payload
9005     (_, _, (dst_info, )) = nodeinfo[target_node].payload
9006
9007     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9008         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9009       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9010       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9011       if src_version != dst_version:
9012         self.feedback_fn("* warning: hypervisor version mismatch between"
9013                          " source (%s) and target (%s) node" %
9014                          (src_version, dst_version))
9015
9016     self.feedback_fn("* checking disk consistency between source and target")
9017     for (idx, dev) in enumerate(instance.disks):
9018       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9019         raise errors.OpExecError("Disk %s is degraded or not fully"
9020                                  " synchronized on target node,"
9021                                  " aborting migration" % idx)
9022
9023     if self.current_mem > self.tgt_free_mem:
9024       if not self.allow_runtime_changes:
9025         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9026                                  " free memory to fit instance %s on target"
9027                                  " node %s (have %dMB, need %dMB)" %
9028                                  (instance.name, target_node,
9029                                   self.tgt_free_mem, self.current_mem))
9030       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9031       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9032                                                      instance,
9033                                                      self.tgt_free_mem)
9034       rpcres.Raise("Cannot modify instance runtime memory")
9035
9036     # First get the migration information from the remote node
9037     result = self.rpc.call_migration_info(source_node, instance)
9038     msg = result.fail_msg
9039     if msg:
9040       log_err = ("Failed fetching source migration information from %s: %s" %
9041                  (source_node, msg))
9042       logging.error(log_err)
9043       raise errors.OpExecError(log_err)
9044
9045     self.migration_info = migration_info = result.payload
9046
9047     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9048       # Then switch the disks to master/master mode
9049       self._EnsureSecondary(target_node)
9050       self._GoStandalone()
9051       self._GoReconnect(True)
9052       self._WaitUntilSync()
9053
9054     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9055     result = self.rpc.call_accept_instance(target_node,
9056                                            instance,
9057                                            migration_info,
9058                                            self.nodes_ip[target_node])
9059
9060     msg = result.fail_msg
9061     if msg:
9062       logging.error("Instance pre-migration failed, trying to revert"
9063                     " disk status: %s", msg)
9064       self.feedback_fn("Pre-migration failed, aborting")
9065       self._AbortMigration()
9066       self._RevertDiskStatus()
9067       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9068                                (instance.name, msg))
9069
9070     self.feedback_fn("* migrating instance to %s" % target_node)
9071     result = self.rpc.call_instance_migrate(source_node, instance,
9072                                             self.nodes_ip[target_node],
9073                                             self.live)
9074     msg = result.fail_msg
9075     if msg:
9076       logging.error("Instance migration failed, trying to revert"
9077                     " disk status: %s", msg)
9078       self.feedback_fn("Migration failed, aborting")
9079       self._AbortMigration()
9080       self._RevertDiskStatus()
9081       raise errors.OpExecError("Could not migrate instance %s: %s" %
9082                                (instance.name, msg))
9083
9084     self.feedback_fn("* starting memory transfer")
9085     last_feedback = time.time()
9086     while True:
9087       result = self.rpc.call_instance_get_migration_status(source_node,
9088                                                            instance)
9089       msg = result.fail_msg
9090       ms = result.payload   # MigrationStatus instance
9091       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9092         logging.error("Instance migration failed, trying to revert"
9093                       " disk status: %s", msg)
9094         self.feedback_fn("Migration failed, aborting")
9095         self._AbortMigration()
9096         self._RevertDiskStatus()
9097         if not msg:
9098           msg = "hypervisor returned failure"
9099         raise errors.OpExecError("Could not migrate instance %s: %s" %
9100                                  (instance.name, msg))
9101
9102       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9103         self.feedback_fn("* memory transfer complete")
9104         break
9105
9106       if (utils.TimeoutExpired(last_feedback,
9107                                self._MIGRATION_FEEDBACK_INTERVAL) and
9108           ms.transferred_ram is not None):
9109         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9110         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9111         last_feedback = time.time()
9112
9113       time.sleep(self._MIGRATION_POLL_INTERVAL)
9114
9115     result = self.rpc.call_instance_finalize_migration_src(source_node,
9116                                                            instance,
9117                                                            True,
9118                                                            self.live)
9119     msg = result.fail_msg
9120     if msg:
9121       logging.error("Instance migration succeeded, but finalization failed"
9122                     " on the source node: %s", msg)
9123       raise errors.OpExecError("Could not finalize instance migration: %s" %
9124                                msg)
9125
9126     instance.primary_node = target_node
9127
9128     # distribute new instance config to the other nodes
9129     self.cfg.Update(instance, self.feedback_fn)
9130
9131     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9132                                                            instance,
9133                                                            migration_info,
9134                                                            True)
9135     msg = result.fail_msg
9136     if msg:
9137       logging.error("Instance migration succeeded, but finalization failed"
9138                     " on the target node: %s", msg)
9139       raise errors.OpExecError("Could not finalize instance migration: %s" %
9140                                msg)
9141
9142     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9143       self._EnsureSecondary(source_node)
9144       self._WaitUntilSync()
9145       self._GoStandalone()
9146       self._GoReconnect(False)
9147       self._WaitUntilSync()
9148
9149     # If the instance's disk template is `rbd' or `ext' and there was a
9150     # successful migration, unmap the device from the source node.
9151     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9152       disks = _ExpandCheckDisks(instance, instance.disks)
9153       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9154       for disk in disks:
9155         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9156         msg = result.fail_msg
9157         if msg:
9158           logging.error("Migration was successful, but couldn't unmap the"
9159                         " block device %s on source node %s: %s",
9160                         disk.iv_name, source_node, msg)
9161           logging.error("You need to unmap the device %s manually on %s",
9162                         disk.iv_name, source_node)
9163
9164     self.feedback_fn("* done")
9165
9166   def _ExecFailover(self):
9167     """Failover an instance.
9168
9169     The failover is done by shutting it down on its present node and
9170     starting it on the secondary.
9171
9172     """
9173     instance = self.instance
9174     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9175
9176     source_node = instance.primary_node
9177     target_node = self.target_node
9178
9179     if instance.admin_state == constants.ADMINST_UP:
9180       self.feedback_fn("* checking disk consistency between source and target")
9181       for (idx, dev) in enumerate(instance.disks):
9182         # for drbd, these are drbd over lvm
9183         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9184                                      False):
9185           if primary_node.offline:
9186             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9187                              " target node %s" %
9188                              (primary_node.name, idx, target_node))
9189           elif not self.ignore_consistency:
9190             raise errors.OpExecError("Disk %s is degraded on target node,"
9191                                      " aborting failover" % idx)
9192     else:
9193       self.feedback_fn("* not checking disk consistency as instance is not"
9194                        " running")
9195
9196     self.feedback_fn("* shutting down instance on source node")
9197     logging.info("Shutting down instance %s on node %s",
9198                  instance.name, source_node)
9199
9200     result = self.rpc.call_instance_shutdown(source_node, instance,
9201                                              self.shutdown_timeout)
9202     msg = result.fail_msg
9203     if msg:
9204       if self.ignore_consistency or primary_node.offline:
9205         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9206                            " proceeding anyway; please make sure node"
9207                            " %s is down; error details: %s",
9208                            instance.name, source_node, source_node, msg)
9209       else:
9210         raise errors.OpExecError("Could not shutdown instance %s on"
9211                                  " node %s: %s" %
9212                                  (instance.name, source_node, msg))
9213
9214     self.feedback_fn("* deactivating the instance's disks on source node")
9215     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9216       raise errors.OpExecError("Can't shut down the instance's disks")
9217
9218     instance.primary_node = target_node
9219     # distribute new instance config to the other nodes
9220     self.cfg.Update(instance, self.feedback_fn)
9221
9222     # Only start the instance if it's marked as up
9223     if instance.admin_state == constants.ADMINST_UP:
9224       self.feedback_fn("* activating the instance's disks on target node %s" %
9225                        target_node)
9226       logging.info("Starting instance %s on node %s",
9227                    instance.name, target_node)
9228
9229       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9230                                            ignore_secondaries=True)
9231       if not disks_ok:
9232         _ShutdownInstanceDisks(self.lu, instance)
9233         raise errors.OpExecError("Can't activate the instance's disks")
9234
9235       self.feedback_fn("* starting the instance on the target node %s" %
9236                        target_node)
9237       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9238                                             False)
9239       msg = result.fail_msg
9240       if msg:
9241         _ShutdownInstanceDisks(self.lu, instance)
9242         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9243                                  (instance.name, target_node, msg))
9244
9245   def Exec(self, feedback_fn):
9246     """Perform the migration.
9247
9248     """
9249     self.feedback_fn = feedback_fn
9250     self.source_node = self.instance.primary_node
9251
9252     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9253     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9254       self.target_node = self.instance.secondary_nodes[0]
9255       # Otherwise self.target_node has been populated either
9256       # directly, or through an iallocator.
9257
9258     self.all_nodes = [self.source_node, self.target_node]
9259     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9260                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9261
9262     if self.failover:
9263       feedback_fn("Failover instance %s" % self.instance.name)
9264       self._ExecFailover()
9265     else:
9266       feedback_fn("Migrating instance %s" % self.instance.name)
9267
9268       if self.cleanup:
9269         return self._ExecCleanup()
9270       else:
9271         return self._ExecMigration()
9272
9273
9274 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9275                     force_open):
9276   """Wrapper around L{_CreateBlockDevInner}.
9277
9278   This method annotates the root device first.
9279
9280   """
9281   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9282   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9283   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9284                               force_open, excl_stor)
9285
9286
9287 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9288                          info, force_open, excl_stor):
9289   """Create a tree of block devices on a given node.
9290
9291   If this device type has to be created on secondaries, create it and
9292   all its children.
9293
9294   If not, just recurse to children keeping the same 'force' value.
9295
9296   @attention: The device has to be annotated already.
9297
9298   @param lu: the lu on whose behalf we execute
9299   @param node: the node on which to create the device
9300   @type instance: L{objects.Instance}
9301   @param instance: the instance which owns the device
9302   @type device: L{objects.Disk}
9303   @param device: the device to create
9304   @type force_create: boolean
9305   @param force_create: whether to force creation of this device; this
9306       will be change to True whenever we find a device which has
9307       CreateOnSecondary() attribute
9308   @param info: the extra 'metadata' we should attach to the device
9309       (this will be represented as a LVM tag)
9310   @type force_open: boolean
9311   @param force_open: this parameter will be passes to the
9312       L{backend.BlockdevCreate} function where it specifies
9313       whether we run on primary or not, and it affects both
9314       the child assembly and the device own Open() execution
9315   @type excl_stor: boolean
9316   @param excl_stor: Whether exclusive_storage is active for the node
9317
9318   """
9319   if device.CreateOnSecondary():
9320     force_create = True
9321
9322   if device.children:
9323     for child in device.children:
9324       _CreateBlockDevInner(lu, node, instance, child, force_create,
9325                            info, force_open, excl_stor)
9326
9327   if not force_create:
9328     return
9329
9330   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9331                         excl_stor)
9332
9333
9334 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9335                           excl_stor):
9336   """Create a single block device on a given node.
9337
9338   This will not recurse over children of the device, so they must be
9339   created in advance.
9340
9341   @param lu: the lu on whose behalf we execute
9342   @param node: the node on which to create the device
9343   @type instance: L{objects.Instance}
9344   @param instance: the instance which owns the device
9345   @type device: L{objects.Disk}
9346   @param device: the device to create
9347   @param info: the extra 'metadata' we should attach to the device
9348       (this will be represented as a LVM tag)
9349   @type force_open: boolean
9350   @param force_open: this parameter will be passes to the
9351       L{backend.BlockdevCreate} function where it specifies
9352       whether we run on primary or not, and it affects both
9353       the child assembly and the device own Open() execution
9354   @type excl_stor: boolean
9355   @param excl_stor: Whether exclusive_storage is active for the node
9356
9357   """
9358   lu.cfg.SetDiskID(device, node)
9359   result = lu.rpc.call_blockdev_create(node, device, device.size,
9360                                        instance.name, force_open, info,
9361                                        excl_stor)
9362   result.Raise("Can't create block device %s on"
9363                " node %s for instance %s" % (device, node, instance.name))
9364   if device.physical_id is None:
9365     device.physical_id = result.payload
9366
9367
9368 def _GenerateUniqueNames(lu, exts):
9369   """Generate a suitable LV name.
9370
9371   This will generate a logical volume name for the given instance.
9372
9373   """
9374   results = []
9375   for val in exts:
9376     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9377     results.append("%s%s" % (new_id, val))
9378   return results
9379
9380
9381 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9382                          iv_name, p_minor, s_minor):
9383   """Generate a drbd8 device complete with its children.
9384
9385   """
9386   assert len(vgnames) == len(names) == 2
9387   port = lu.cfg.AllocatePort()
9388   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9389
9390   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9391                           logical_id=(vgnames[0], names[0]),
9392                           params={})
9393   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9394                           size=constants.DRBD_META_SIZE,
9395                           logical_id=(vgnames[1], names[1]),
9396                           params={})
9397   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9398                           logical_id=(primary, secondary, port,
9399                                       p_minor, s_minor,
9400                                       shared_secret),
9401                           children=[dev_data, dev_meta],
9402                           iv_name=iv_name, params={})
9403   return drbd_dev
9404
9405
9406 _DISK_TEMPLATE_NAME_PREFIX = {
9407   constants.DT_PLAIN: "",
9408   constants.DT_RBD: ".rbd",
9409   constants.DT_EXT: ".ext",
9410   }
9411
9412
9413 _DISK_TEMPLATE_DEVICE_TYPE = {
9414   constants.DT_PLAIN: constants.LD_LV,
9415   constants.DT_FILE: constants.LD_FILE,
9416   constants.DT_SHARED_FILE: constants.LD_FILE,
9417   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9418   constants.DT_RBD: constants.LD_RBD,
9419   constants.DT_EXT: constants.LD_EXT,
9420   }
9421
9422
9423 def _GenerateDiskTemplate(
9424   lu, template_name, instance_name, primary_node, secondary_nodes,
9425   disk_info, file_storage_dir, file_driver, base_index,
9426   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9427   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9428   """Generate the entire disk layout for a given template type.
9429
9430   """
9431   vgname = lu.cfg.GetVGName()
9432   disk_count = len(disk_info)
9433   disks = []
9434
9435   if template_name == constants.DT_DISKLESS:
9436     pass
9437   elif template_name == constants.DT_DRBD8:
9438     if len(secondary_nodes) != 1:
9439       raise errors.ProgrammerError("Wrong template configuration")
9440     remote_node = secondary_nodes[0]
9441     minors = lu.cfg.AllocateDRBDMinor(
9442       [primary_node, remote_node] * len(disk_info), instance_name)
9443
9444     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9445                                                        full_disk_params)
9446     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9447
9448     names = []
9449     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9450                                                for i in range(disk_count)]):
9451       names.append(lv_prefix + "_data")
9452       names.append(lv_prefix + "_meta")
9453     for idx, disk in enumerate(disk_info):
9454       disk_index = idx + base_index
9455       data_vg = disk.get(constants.IDISK_VG, vgname)
9456       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9457       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9458                                       disk[constants.IDISK_SIZE],
9459                                       [data_vg, meta_vg],
9460                                       names[idx * 2:idx * 2 + 2],
9461                                       "disk/%d" % disk_index,
9462                                       minors[idx * 2], minors[idx * 2 + 1])
9463       disk_dev.mode = disk[constants.IDISK_MODE]
9464       disks.append(disk_dev)
9465   else:
9466     if secondary_nodes:
9467       raise errors.ProgrammerError("Wrong template configuration")
9468
9469     if template_name == constants.DT_FILE:
9470       _req_file_storage()
9471     elif template_name == constants.DT_SHARED_FILE:
9472       _req_shr_file_storage()
9473
9474     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9475     if name_prefix is None:
9476       names = None
9477     else:
9478       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9479                                         (name_prefix, base_index + i)
9480                                         for i in range(disk_count)])
9481
9482     if template_name == constants.DT_PLAIN:
9483
9484       def logical_id_fn(idx, _, disk):
9485         vg = disk.get(constants.IDISK_VG, vgname)
9486         return (vg, names[idx])
9487
9488     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9489       logical_id_fn = \
9490         lambda _, disk_index, disk: (file_driver,
9491                                      "%s/disk%d" % (file_storage_dir,
9492                                                     disk_index))
9493     elif template_name == constants.DT_BLOCK:
9494       logical_id_fn = \
9495         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9496                                        disk[constants.IDISK_ADOPT])
9497     elif template_name == constants.DT_RBD:
9498       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9499     elif template_name == constants.DT_EXT:
9500       def logical_id_fn(idx, _, disk):
9501         provider = disk.get(constants.IDISK_PROVIDER, None)
9502         if provider is None:
9503           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9504                                        " not found", constants.DT_EXT,
9505                                        constants.IDISK_PROVIDER)
9506         return (provider, names[idx])
9507     else:
9508       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9509
9510     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9511
9512     for idx, disk in enumerate(disk_info):
9513       params = {}
9514       # Only for the Ext template add disk_info to params
9515       if template_name == constants.DT_EXT:
9516         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9517         for key in disk:
9518           if key not in constants.IDISK_PARAMS:
9519             params[key] = disk[key]
9520       disk_index = idx + base_index
9521       size = disk[constants.IDISK_SIZE]
9522       feedback_fn("* disk %s, size %s" %
9523                   (disk_index, utils.FormatUnit(size, "h")))
9524       disks.append(objects.Disk(dev_type=dev_type, size=size,
9525                                 logical_id=logical_id_fn(idx, disk_index, disk),
9526                                 iv_name="disk/%d" % disk_index,
9527                                 mode=disk[constants.IDISK_MODE],
9528                                 params=params))
9529
9530   return disks
9531
9532
9533 def _GetInstanceInfoText(instance):
9534   """Compute that text that should be added to the disk's metadata.
9535
9536   """
9537   return "originstname+%s" % instance.name
9538
9539
9540 def _CalcEta(time_taken, written, total_size):
9541   """Calculates the ETA based on size written and total size.
9542
9543   @param time_taken: The time taken so far
9544   @param written: amount written so far
9545   @param total_size: The total size of data to be written
9546   @return: The remaining time in seconds
9547
9548   """
9549   avg_time = time_taken / float(written)
9550   return (total_size - written) * avg_time
9551
9552
9553 def _WipeDisks(lu, instance, disks=None):
9554   """Wipes instance disks.
9555
9556   @type lu: L{LogicalUnit}
9557   @param lu: the logical unit on whose behalf we execute
9558   @type instance: L{objects.Instance}
9559   @param instance: the instance whose disks we should create
9560   @return: the success of the wipe
9561
9562   """
9563   node = instance.primary_node
9564
9565   if disks is None:
9566     disks = [(idx, disk, 0)
9567              for (idx, disk) in enumerate(instance.disks)]
9568
9569   for (_, device, _) in disks:
9570     lu.cfg.SetDiskID(device, node)
9571
9572   logging.info("Pausing synchronization of disks of instance '%s'",
9573                instance.name)
9574   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9575                                                   (map(compat.snd, disks),
9576                                                    instance),
9577                                                   True)
9578   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9579
9580   for idx, success in enumerate(result.payload):
9581     if not success:
9582       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9583                    " failed", idx, instance.name)
9584
9585   try:
9586     for (idx, device, offset) in disks:
9587       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9588       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9589       wipe_chunk_size = \
9590         int(min(constants.MAX_WIPE_CHUNK,
9591                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9592
9593       size = device.size
9594       last_output = 0
9595       start_time = time.time()
9596
9597       if offset == 0:
9598         info_text = ""
9599       else:
9600         info_text = (" (from %s to %s)" %
9601                      (utils.FormatUnit(offset, "h"),
9602                       utils.FormatUnit(size, "h")))
9603
9604       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9605
9606       logging.info("Wiping disk %d for instance %s on node %s using"
9607                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9608
9609       while offset < size:
9610         wipe_size = min(wipe_chunk_size, size - offset)
9611
9612         logging.debug("Wiping disk %d, offset %s, chunk %s",
9613                       idx, offset, wipe_size)
9614
9615         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9616                                            wipe_size)
9617         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9618                      (idx, offset, wipe_size))
9619
9620         now = time.time()
9621         offset += wipe_size
9622         if now - last_output >= 60:
9623           eta = _CalcEta(now - start_time, offset, size)
9624           lu.LogInfo(" - done: %.1f%% ETA: %s",
9625                      offset / float(size) * 100, utils.FormatSeconds(eta))
9626           last_output = now
9627   finally:
9628     logging.info("Resuming synchronization of disks for instance '%s'",
9629                  instance.name)
9630
9631     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9632                                                     (map(compat.snd, disks),
9633                                                      instance),
9634                                                     False)
9635
9636     if result.fail_msg:
9637       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9638                     node, result.fail_msg)
9639     else:
9640       for idx, success in enumerate(result.payload):
9641         if not success:
9642           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9643                         " failed", idx, instance.name)
9644
9645
9646 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9647   """Create all disks for an instance.
9648
9649   This abstracts away some work from AddInstance.
9650
9651   @type lu: L{LogicalUnit}
9652   @param lu: the logical unit on whose behalf we execute
9653   @type instance: L{objects.Instance}
9654   @param instance: the instance whose disks we should create
9655   @type to_skip: list
9656   @param to_skip: list of indices to skip
9657   @type target_node: string
9658   @param target_node: if passed, overrides the target node for creation
9659   @rtype: boolean
9660   @return: the success of the creation
9661
9662   """
9663   info = _GetInstanceInfoText(instance)
9664   if target_node is None:
9665     pnode = instance.primary_node
9666     all_nodes = instance.all_nodes
9667   else:
9668     pnode = target_node
9669     all_nodes = [pnode]
9670
9671   if instance.disk_template in constants.DTS_FILEBASED:
9672     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9673     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9674
9675     result.Raise("Failed to create directory '%s' on"
9676                  " node %s" % (file_storage_dir, pnode))
9677
9678   # Note: this needs to be kept in sync with adding of disks in
9679   # LUInstanceSetParams
9680   for idx, device in enumerate(instance.disks):
9681     if to_skip and idx in to_skip:
9682       continue
9683     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9684     #HARDCODE
9685     for node in all_nodes:
9686       f_create = node == pnode
9687       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9688
9689
9690 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9691   """Remove all disks for an instance.
9692
9693   This abstracts away some work from `AddInstance()` and
9694   `RemoveInstance()`. Note that in case some of the devices couldn't
9695   be removed, the removal will continue with the other ones (compare
9696   with `_CreateDisks()`).
9697
9698   @type lu: L{LogicalUnit}
9699   @param lu: the logical unit on whose behalf we execute
9700   @type instance: L{objects.Instance}
9701   @param instance: the instance whose disks we should remove
9702   @type target_node: string
9703   @param target_node: used to override the node on which to remove the disks
9704   @rtype: boolean
9705   @return: the success of the removal
9706
9707   """
9708   logging.info("Removing block devices for instance %s", instance.name)
9709
9710   all_result = True
9711   ports_to_release = set()
9712   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9713   for (idx, device) in enumerate(anno_disks):
9714     if target_node:
9715       edata = [(target_node, device)]
9716     else:
9717       edata = device.ComputeNodeTree(instance.primary_node)
9718     for node, disk in edata:
9719       lu.cfg.SetDiskID(disk, node)
9720       result = lu.rpc.call_blockdev_remove(node, disk)
9721       if result.fail_msg:
9722         lu.LogWarning("Could not remove disk %s on node %s,"
9723                       " continuing anyway: %s", idx, node, result.fail_msg)
9724         if not (result.offline and node != instance.primary_node):
9725           all_result = False
9726
9727     # if this is a DRBD disk, return its port to the pool
9728     if device.dev_type in constants.LDS_DRBD:
9729       ports_to_release.add(device.logical_id[2])
9730
9731   if all_result or ignore_failures:
9732     for port in ports_to_release:
9733       lu.cfg.AddTcpUdpPort(port)
9734
9735   if instance.disk_template in constants.DTS_FILEBASED:
9736     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9737     if target_node:
9738       tgt = target_node
9739     else:
9740       tgt = instance.primary_node
9741     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9742     if result.fail_msg:
9743       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9744                     file_storage_dir, instance.primary_node, result.fail_msg)
9745       all_result = False
9746
9747   return all_result
9748
9749
9750 def _ComputeDiskSizePerVG(disk_template, disks):
9751   """Compute disk size requirements in the volume group
9752
9753   """
9754   def _compute(disks, payload):
9755     """Universal algorithm.
9756
9757     """
9758     vgs = {}
9759     for disk in disks:
9760       vgs[disk[constants.IDISK_VG]] = \
9761         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9762
9763     return vgs
9764
9765   # Required free disk space as a function of disk and swap space
9766   req_size_dict = {
9767     constants.DT_DISKLESS: {},
9768     constants.DT_PLAIN: _compute(disks, 0),
9769     # 128 MB are added for drbd metadata for each disk
9770     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9771     constants.DT_FILE: {},
9772     constants.DT_SHARED_FILE: {},
9773   }
9774
9775   if disk_template not in req_size_dict:
9776     raise errors.ProgrammerError("Disk template '%s' size requirement"
9777                                  " is unknown" % disk_template)
9778
9779   return req_size_dict[disk_template]
9780
9781
9782 def _FilterVmNodes(lu, nodenames):
9783   """Filters out non-vm_capable nodes from a list.
9784
9785   @type lu: L{LogicalUnit}
9786   @param lu: the logical unit for which we check
9787   @type nodenames: list
9788   @param nodenames: the list of nodes on which we should check
9789   @rtype: list
9790   @return: the list of vm-capable nodes
9791
9792   """
9793   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9794   return [name for name in nodenames if name not in vm_nodes]
9795
9796
9797 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9798   """Hypervisor parameter validation.
9799
9800   This function abstract the hypervisor parameter validation to be
9801   used in both instance create and instance modify.
9802
9803   @type lu: L{LogicalUnit}
9804   @param lu: the logical unit for which we check
9805   @type nodenames: list
9806   @param nodenames: the list of nodes on which we should check
9807   @type hvname: string
9808   @param hvname: the name of the hypervisor we should use
9809   @type hvparams: dict
9810   @param hvparams: the parameters which we need to check
9811   @raise errors.OpPrereqError: if the parameters are not valid
9812
9813   """
9814   nodenames = _FilterVmNodes(lu, nodenames)
9815
9816   cluster = lu.cfg.GetClusterInfo()
9817   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9818
9819   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9820   for node in nodenames:
9821     info = hvinfo[node]
9822     if info.offline:
9823       continue
9824     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9825
9826
9827 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9828   """OS parameters validation.
9829
9830   @type lu: L{LogicalUnit}
9831   @param lu: the logical unit for which we check
9832   @type required: boolean
9833   @param required: whether the validation should fail if the OS is not
9834       found
9835   @type nodenames: list
9836   @param nodenames: the list of nodes on which we should check
9837   @type osname: string
9838   @param osname: the name of the hypervisor we should use
9839   @type osparams: dict
9840   @param osparams: the parameters which we need to check
9841   @raise errors.OpPrereqError: if the parameters are not valid
9842
9843   """
9844   nodenames = _FilterVmNodes(lu, nodenames)
9845   result = lu.rpc.call_os_validate(nodenames, required, osname,
9846                                    [constants.OS_VALIDATE_PARAMETERS],
9847                                    osparams)
9848   for node, nres in result.items():
9849     # we don't check for offline cases since this should be run only
9850     # against the master node and/or an instance's nodes
9851     nres.Raise("OS Parameters validation failed on node %s" % node)
9852     if not nres.payload:
9853       lu.LogInfo("OS %s not found on node %s, validation skipped",
9854                  osname, node)
9855
9856
9857 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9858   """Wrapper around IAReqInstanceAlloc.
9859
9860   @param op: The instance opcode
9861   @param disks: The computed disks
9862   @param nics: The computed nics
9863   @param beparams: The full filled beparams
9864   @param node_whitelist: List of nodes which should appear as online to the
9865     allocator (unless the node is already marked offline)
9866
9867   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9868
9869   """
9870   spindle_use = beparams[constants.BE_SPINDLE_USE]
9871   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9872                                        disk_template=op.disk_template,
9873                                        tags=op.tags,
9874                                        os=op.os_type,
9875                                        vcpus=beparams[constants.BE_VCPUS],
9876                                        memory=beparams[constants.BE_MAXMEM],
9877                                        spindle_use=spindle_use,
9878                                        disks=disks,
9879                                        nics=[n.ToDict() for n in nics],
9880                                        hypervisor=op.hypervisor,
9881                                        node_whitelist=node_whitelist)
9882
9883
9884 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9885   """Computes the nics.
9886
9887   @param op: The instance opcode
9888   @param cluster: Cluster configuration object
9889   @param default_ip: The default ip to assign
9890   @param cfg: An instance of the configuration object
9891   @param ec_id: Execution context ID
9892
9893   @returns: The build up nics
9894
9895   """
9896   nics = []
9897   for nic in op.nics:
9898     nic_mode_req = nic.get(constants.INIC_MODE, None)
9899     nic_mode = nic_mode_req
9900     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9901       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9902
9903     net = nic.get(constants.INIC_NETWORK, None)
9904     link = nic.get(constants.NIC_LINK, None)
9905     ip = nic.get(constants.INIC_IP, None)
9906
9907     if net is None or net.lower() == constants.VALUE_NONE:
9908       net = None
9909     else:
9910       if nic_mode_req is not None or link is not None:
9911         raise errors.OpPrereqError("If network is given, no mode or link"
9912                                    " is allowed to be passed",
9913                                    errors.ECODE_INVAL)
9914
9915     # ip validity checks
9916     if ip is None or ip.lower() == constants.VALUE_NONE:
9917       nic_ip = None
9918     elif ip.lower() == constants.VALUE_AUTO:
9919       if not op.name_check:
9920         raise errors.OpPrereqError("IP address set to auto but name checks"
9921                                    " have been skipped",
9922                                    errors.ECODE_INVAL)
9923       nic_ip = default_ip
9924     else:
9925       # We defer pool operations until later, so that the iallocator has
9926       # filled in the instance's node(s) dimara
9927       if ip.lower() == constants.NIC_IP_POOL:
9928         if net is None:
9929           raise errors.OpPrereqError("if ip=pool, parameter network"
9930                                      " must be passed too",
9931                                      errors.ECODE_INVAL)
9932
9933       elif not netutils.IPAddress.IsValid(ip):
9934         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9935                                    errors.ECODE_INVAL)
9936
9937       nic_ip = ip
9938
9939     # TODO: check the ip address for uniqueness
9940     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9941       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9942                                  errors.ECODE_INVAL)
9943
9944     # MAC address verification
9945     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9946     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947       mac = utils.NormalizeAndValidateMac(mac)
9948
9949       try:
9950         # TODO: We need to factor this out
9951         cfg.ReserveMAC(mac, ec_id)
9952       except errors.ReservationError:
9953         raise errors.OpPrereqError("MAC address %s already in use"
9954                                    " in cluster" % mac,
9955                                    errors.ECODE_NOTUNIQUE)
9956
9957     #  Build nic parameters
9958     nicparams = {}
9959     if nic_mode_req:
9960       nicparams[constants.NIC_MODE] = nic_mode
9961     if link:
9962       nicparams[constants.NIC_LINK] = link
9963
9964     check_params = cluster.SimpleFillNIC(nicparams)
9965     objects.NIC.CheckParameterSyntax(check_params)
9966     net_uuid = cfg.LookupNetwork(net)
9967     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968                             network=net_uuid, nicparams=nicparams))
9969
9970   return nics
9971
9972
9973 def _ComputeDisks(op, default_vg):
9974   """Computes the instance disks.
9975
9976   @param op: The instance opcode
9977   @param default_vg: The default_vg to assume
9978
9979   @return: The computed disks
9980
9981   """
9982   disks = []
9983   for disk in op.disks:
9984     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985     if mode not in constants.DISK_ACCESS_SET:
9986       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987                                  mode, errors.ECODE_INVAL)
9988     size = disk.get(constants.IDISK_SIZE, None)
9989     if size is None:
9990       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9991     try:
9992       size = int(size)
9993     except (TypeError, ValueError):
9994       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9995                                  errors.ECODE_INVAL)
9996
9997     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998     if ext_provider and op.disk_template != constants.DT_EXT:
9999       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000                                  " disk template, not %s" %
10001                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
10002                                  op.disk_template), errors.ECODE_INVAL)
10003
10004     data_vg = disk.get(constants.IDISK_VG, default_vg)
10005     new_disk = {
10006       constants.IDISK_SIZE: size,
10007       constants.IDISK_MODE: mode,
10008       constants.IDISK_VG: data_vg,
10009       }
10010
10011     if constants.IDISK_METAVG in disk:
10012       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013     if constants.IDISK_ADOPT in disk:
10014       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10015
10016     # For extstorage, demand the `provider' option and add any
10017     # additional parameters (ext-params) to the dict
10018     if op.disk_template == constants.DT_EXT:
10019       if ext_provider:
10020         new_disk[constants.IDISK_PROVIDER] = ext_provider
10021         for key in disk:
10022           if key not in constants.IDISK_PARAMS:
10023             new_disk[key] = disk[key]
10024       else:
10025         raise errors.OpPrereqError("Missing provider for template '%s'" %
10026                                    constants.DT_EXT, errors.ECODE_INVAL)
10027
10028     disks.append(new_disk)
10029
10030   return disks
10031
10032
10033 def _ComputeFullBeParams(op, cluster):
10034   """Computes the full beparams.
10035
10036   @param op: The instance opcode
10037   @param cluster: The cluster config object
10038
10039   @return: The fully filled beparams
10040
10041   """
10042   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043   for param, value in op.beparams.iteritems():
10044     if value == constants.VALUE_AUTO:
10045       op.beparams[param] = default_beparams[param]
10046   objects.UpgradeBeParams(op.beparams)
10047   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048   return cluster.SimpleFillBE(op.beparams)
10049
10050
10051 def _CheckOpportunisticLocking(op):
10052   """Generate error if opportunistic locking is not possible.
10053
10054   """
10055   if op.opportunistic_locking and not op.iallocator:
10056     raise errors.OpPrereqError("Opportunistic locking is only available in"
10057                                " combination with an instance allocator",
10058                                errors.ECODE_INVAL)
10059
10060
10061 class LUInstanceCreate(LogicalUnit):
10062   """Create an instance.
10063
10064   """
10065   HPATH = "instance-add"
10066   HTYPE = constants.HTYPE_INSTANCE
10067   REQ_BGL = False
10068
10069   def CheckArguments(self):
10070     """Check arguments.
10071
10072     """
10073     # do not require name_check to ease forward/backward compatibility
10074     # for tools
10075     if self.op.no_install and self.op.start:
10076       self.LogInfo("No-installation mode selected, disabling startup")
10077       self.op.start = False
10078     # validate/normalize the instance name
10079     self.op.instance_name = \
10080       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10081
10082     if self.op.ip_check and not self.op.name_check:
10083       # TODO: make the ip check more flexible and not depend on the name check
10084       raise errors.OpPrereqError("Cannot do IP address check without a name"
10085                                  " check", errors.ECODE_INVAL)
10086
10087     # check nics' parameter names
10088     for nic in self.op.nics:
10089       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10090
10091     # check disks. parameter names and consistent adopt/no-adopt strategy
10092     has_adopt = has_no_adopt = False
10093     for disk in self.op.disks:
10094       if self.op.disk_template != constants.DT_EXT:
10095         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096       if constants.IDISK_ADOPT in disk:
10097         has_adopt = True
10098       else:
10099         has_no_adopt = True
10100     if has_adopt and has_no_adopt:
10101       raise errors.OpPrereqError("Either all disks are adopted or none is",
10102                                  errors.ECODE_INVAL)
10103     if has_adopt:
10104       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105         raise errors.OpPrereqError("Disk adoption is not supported for the"
10106                                    " '%s' disk template" %
10107                                    self.op.disk_template,
10108                                    errors.ECODE_INVAL)
10109       if self.op.iallocator is not None:
10110         raise errors.OpPrereqError("Disk adoption not allowed with an"
10111                                    " iallocator script", errors.ECODE_INVAL)
10112       if self.op.mode == constants.INSTANCE_IMPORT:
10113         raise errors.OpPrereqError("Disk adoption not allowed for"
10114                                    " instance import", errors.ECODE_INVAL)
10115     else:
10116       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118                                    " but no 'adopt' parameter given" %
10119                                    self.op.disk_template,
10120                                    errors.ECODE_INVAL)
10121
10122     self.adopt_disks = has_adopt
10123
10124     # instance name verification
10125     if self.op.name_check:
10126       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127       self.op.instance_name = self.hostname1.name
10128       # used in CheckPrereq for ip ping check
10129       self.check_ip = self.hostname1.ip
10130     else:
10131       self.check_ip = None
10132
10133     # file storage checks
10134     if (self.op.file_driver and
10135         not self.op.file_driver in constants.FILE_DRIVER):
10136       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137                                  self.op.file_driver, errors.ECODE_INVAL)
10138
10139     if self.op.disk_template == constants.DT_FILE:
10140       opcodes.RequireFileStorage()
10141     elif self.op.disk_template == constants.DT_SHARED_FILE:
10142       opcodes.RequireSharedFileStorage()
10143
10144     ### Node/iallocator related checks
10145     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10146
10147     if self.op.pnode is not None:
10148       if self.op.disk_template in constants.DTS_INT_MIRROR:
10149         if self.op.snode is None:
10150           raise errors.OpPrereqError("The networked disk templates need"
10151                                      " a mirror node", errors.ECODE_INVAL)
10152       elif self.op.snode:
10153         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10154                         " template")
10155         self.op.snode = None
10156
10157     _CheckOpportunisticLocking(self.op)
10158
10159     self._cds = _GetClusterDomainSecret()
10160
10161     if self.op.mode == constants.INSTANCE_IMPORT:
10162       # On import force_variant must be True, because if we forced it at
10163       # initial install, our only chance when importing it back is that it
10164       # works again!
10165       self.op.force_variant = True
10166
10167       if self.op.no_install:
10168         self.LogInfo("No-installation mode has no effect during import")
10169
10170     elif self.op.mode == constants.INSTANCE_CREATE:
10171       if self.op.os_type is None:
10172         raise errors.OpPrereqError("No guest OS specified",
10173                                    errors.ECODE_INVAL)
10174       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176                                    " installation" % self.op.os_type,
10177                                    errors.ECODE_STATE)
10178       if self.op.disk_template is None:
10179         raise errors.OpPrereqError("No disk template specified",
10180                                    errors.ECODE_INVAL)
10181
10182     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183       # Check handshake to ensure both clusters have the same domain secret
10184       src_handshake = self.op.source_handshake
10185       if not src_handshake:
10186         raise errors.OpPrereqError("Missing source handshake",
10187                                    errors.ECODE_INVAL)
10188
10189       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10190                                                            src_handshake)
10191       if errmsg:
10192         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193                                    errors.ECODE_INVAL)
10194
10195       # Load and check source CA
10196       self.source_x509_ca_pem = self.op.source_x509_ca
10197       if not self.source_x509_ca_pem:
10198         raise errors.OpPrereqError("Missing source X509 CA",
10199                                    errors.ECODE_INVAL)
10200
10201       try:
10202         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10203                                                     self._cds)
10204       except OpenSSL.crypto.Error, err:
10205         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206                                    (err, ), errors.ECODE_INVAL)
10207
10208       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209       if errcode is not None:
10210         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211                                    errors.ECODE_INVAL)
10212
10213       self.source_x509_ca = cert
10214
10215       src_instance_name = self.op.source_instance_name
10216       if not src_instance_name:
10217         raise errors.OpPrereqError("Missing source instance name",
10218                                    errors.ECODE_INVAL)
10219
10220       self.source_instance_name = \
10221           netutils.GetHostname(name=src_instance_name).name
10222
10223     else:
10224       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225                                  self.op.mode, errors.ECODE_INVAL)
10226
10227   def ExpandNames(self):
10228     """ExpandNames for CreateInstance.
10229
10230     Figure out the right locks for instance creation.
10231
10232     """
10233     self.needed_locks = {}
10234
10235     instance_name = self.op.instance_name
10236     # this is just a preventive check, but someone might still add this
10237     # instance in the meantime, and creation will fail at lock-add time
10238     if instance_name in self.cfg.GetInstanceList():
10239       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240                                  instance_name, errors.ECODE_EXISTS)
10241
10242     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10243
10244     if self.op.iallocator:
10245       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246       # specifying a group on instance creation and then selecting nodes from
10247       # that group
10248       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10250
10251       if self.op.opportunistic_locking:
10252         self.opportunistic_locks[locking.LEVEL_NODE] = True
10253         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10254     else:
10255       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256       nodelist = [self.op.pnode]
10257       if self.op.snode is not None:
10258         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259         nodelist.append(self.op.snode)
10260       self.needed_locks[locking.LEVEL_NODE] = nodelist
10261
10262     # in case of import lock the source node too
10263     if self.op.mode == constants.INSTANCE_IMPORT:
10264       src_node = self.op.src_node
10265       src_path = self.op.src_path
10266
10267       if src_path is None:
10268         self.op.src_path = src_path = self.op.instance_name
10269
10270       if src_node is None:
10271         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273         self.op.src_node = None
10274         if os.path.isabs(src_path):
10275           raise errors.OpPrereqError("Importing an instance from a path"
10276                                      " requires a source node option",
10277                                      errors.ECODE_INVAL)
10278       else:
10279         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282         if not os.path.isabs(src_path):
10283           self.op.src_path = src_path = \
10284             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10285
10286     self.needed_locks[locking.LEVEL_NODE_RES] = \
10287       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10288
10289   def _RunAllocator(self):
10290     """Run the allocator based on input opcode.
10291
10292     """
10293     if self.op.opportunistic_locking:
10294       # Only consider nodes for which a lock is held
10295       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10296     else:
10297       node_whitelist = None
10298
10299     #TODO Export network to iallocator so that it chooses a pnode
10300     #     in a nodegroup that has the desired network connected to
10301     req = _CreateInstanceAllocRequest(self.op, self.disks,
10302                                       self.nics, self.be_full,
10303                                       node_whitelist)
10304     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10305
10306     ial.Run(self.op.iallocator)
10307
10308     if not ial.success:
10309       # When opportunistic locks are used only a temporary failure is generated
10310       if self.op.opportunistic_locking:
10311         ecode = errors.ECODE_TEMP_NORES
10312       else:
10313         ecode = errors.ECODE_NORES
10314
10315       raise errors.OpPrereqError("Can't compute nodes using"
10316                                  " iallocator '%s': %s" %
10317                                  (self.op.iallocator, ial.info),
10318                                  ecode)
10319
10320     self.op.pnode = ial.result[0]
10321     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322                  self.op.instance_name, self.op.iallocator,
10323                  utils.CommaJoin(ial.result))
10324
10325     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10326
10327     if req.RequiredNodes() == 2:
10328       self.op.snode = ial.result[1]
10329
10330   def BuildHooksEnv(self):
10331     """Build hooks env.
10332
10333     This runs on master, primary and secondary nodes of the instance.
10334
10335     """
10336     env = {
10337       "ADD_MODE": self.op.mode,
10338       }
10339     if self.op.mode == constants.INSTANCE_IMPORT:
10340       env["SRC_NODE"] = self.op.src_node
10341       env["SRC_PATH"] = self.op.src_path
10342       env["SRC_IMAGES"] = self.src_images
10343
10344     env.update(_BuildInstanceHookEnv(
10345       name=self.op.instance_name,
10346       primary_node=self.op.pnode,
10347       secondary_nodes=self.secondaries,
10348       status=self.op.start,
10349       os_type=self.op.os_type,
10350       minmem=self.be_full[constants.BE_MINMEM],
10351       maxmem=self.be_full[constants.BE_MAXMEM],
10352       vcpus=self.be_full[constants.BE_VCPUS],
10353       nics=_NICListToTuple(self, self.nics),
10354       disk_template=self.op.disk_template,
10355       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356              for d in self.disks],
10357       bep=self.be_full,
10358       hvp=self.hv_full,
10359       hypervisor_name=self.op.hypervisor,
10360       tags=self.op.tags,
10361     ))
10362
10363     return env
10364
10365   def BuildHooksNodes(self):
10366     """Build hooks nodes.
10367
10368     """
10369     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10370     return nl, nl
10371
10372   def _ReadExportInfo(self):
10373     """Reads the export information from disk.
10374
10375     It will override the opcode source node and path with the actual
10376     information, if these two were not specified before.
10377
10378     @return: the export information
10379
10380     """
10381     assert self.op.mode == constants.INSTANCE_IMPORT
10382
10383     src_node = self.op.src_node
10384     src_path = self.op.src_path
10385
10386     if src_node is None:
10387       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388       exp_list = self.rpc.call_export_list(locked_nodes)
10389       found = False
10390       for node in exp_list:
10391         if exp_list[node].fail_msg:
10392           continue
10393         if src_path in exp_list[node].payload:
10394           found = True
10395           self.op.src_node = src_node = node
10396           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10397                                                        src_path)
10398           break
10399       if not found:
10400         raise errors.OpPrereqError("No export found for relative path %s" %
10401                                     src_path, errors.ECODE_INVAL)
10402
10403     _CheckNodeOnline(self, src_node)
10404     result = self.rpc.call_export_info(src_node, src_path)
10405     result.Raise("No export or invalid export found in dir %s" % src_path)
10406
10407     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408     if not export_info.has_section(constants.INISECT_EXP):
10409       raise errors.ProgrammerError("Corrupted export config",
10410                                    errors.ECODE_ENVIRON)
10411
10412     ei_version = export_info.get(constants.INISECT_EXP, "version")
10413     if (int(ei_version) != constants.EXPORT_VERSION):
10414       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415                                  (ei_version, constants.EXPORT_VERSION),
10416                                  errors.ECODE_ENVIRON)
10417     return export_info
10418
10419   def _ReadExportParams(self, einfo):
10420     """Use export parameters as defaults.
10421
10422     In case the opcode doesn't specify (as in override) some instance
10423     parameters, then try to use them from the export information, if
10424     that declares them.
10425
10426     """
10427     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10428
10429     if self.op.disk_template is None:
10430       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431         self.op.disk_template = einfo.get(constants.INISECT_INS,
10432                                           "disk_template")
10433         if self.op.disk_template not in constants.DISK_TEMPLATES:
10434           raise errors.OpPrereqError("Disk template specified in configuration"
10435                                      " file is not one of the allowed values:"
10436                                      " %s" %
10437                                      " ".join(constants.DISK_TEMPLATES),
10438                                      errors.ECODE_INVAL)
10439       else:
10440         raise errors.OpPrereqError("No disk template specified and the export"
10441                                    " is missing the disk_template information",
10442                                    errors.ECODE_INVAL)
10443
10444     if not self.op.disks:
10445       disks = []
10446       # TODO: import the disk iv_name too
10447       for idx in range(constants.MAX_DISKS):
10448         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450           disks.append({constants.IDISK_SIZE: disk_sz})
10451       self.op.disks = disks
10452       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453         raise errors.OpPrereqError("No disk info specified and the export"
10454                                    " is missing the disk information",
10455                                    errors.ECODE_INVAL)
10456
10457     if not self.op.nics:
10458       nics = []
10459       for idx in range(constants.MAX_NICS):
10460         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10461           ndict = {}
10462           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10464             ndict[name] = v
10465           nics.append(ndict)
10466         else:
10467           break
10468       self.op.nics = nics
10469
10470     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10472
10473     if (self.op.hypervisor is None and
10474         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10476
10477     if einfo.has_section(constants.INISECT_HYP):
10478       # use the export parameters but do not override the ones
10479       # specified by the user
10480       for name, value in einfo.items(constants.INISECT_HYP):
10481         if name not in self.op.hvparams:
10482           self.op.hvparams[name] = value
10483
10484     if einfo.has_section(constants.INISECT_BEP):
10485       # use the parameters, without overriding
10486       for name, value in einfo.items(constants.INISECT_BEP):
10487         if name not in self.op.beparams:
10488           self.op.beparams[name] = value
10489         # Compatibility for the old "memory" be param
10490         if name == constants.BE_MEMORY:
10491           if constants.BE_MAXMEM not in self.op.beparams:
10492             self.op.beparams[constants.BE_MAXMEM] = value
10493           if constants.BE_MINMEM not in self.op.beparams:
10494             self.op.beparams[constants.BE_MINMEM] = value
10495     else:
10496       # try to read the parameters old style, from the main section
10497       for name in constants.BES_PARAMETERS:
10498         if (name not in self.op.beparams and
10499             einfo.has_option(constants.INISECT_INS, name)):
10500           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10501
10502     if einfo.has_section(constants.INISECT_OSP):
10503       # use the parameters, without overriding
10504       for name, value in einfo.items(constants.INISECT_OSP):
10505         if name not in self.op.osparams:
10506           self.op.osparams[name] = value
10507
10508   def _RevertToDefaults(self, cluster):
10509     """Revert the instance parameters to the default values.
10510
10511     """
10512     # hvparams
10513     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514     for name in self.op.hvparams.keys():
10515       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516         del self.op.hvparams[name]
10517     # beparams
10518     be_defs = cluster.SimpleFillBE({})
10519     for name in self.op.beparams.keys():
10520       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521         del self.op.beparams[name]
10522     # nic params
10523     nic_defs = cluster.SimpleFillNIC({})
10524     for nic in self.op.nics:
10525       for name in constants.NICS_PARAMETERS:
10526         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10527           del nic[name]
10528     # osparams
10529     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530     for name in self.op.osparams.keys():
10531       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532         del self.op.osparams[name]
10533
10534   def _CalculateFileStorageDir(self):
10535     """Calculate final instance file storage dir.
10536
10537     """
10538     # file storage dir calculation/check
10539     self.instance_file_storage_dir = None
10540     if self.op.disk_template in constants.DTS_FILEBASED:
10541       # build the full file storage dir path
10542       joinargs = []
10543
10544       if self.op.disk_template == constants.DT_SHARED_FILE:
10545         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10546       else:
10547         get_fsd_fn = self.cfg.GetFileStorageDir
10548
10549       cfg_storagedir = get_fsd_fn()
10550       if not cfg_storagedir:
10551         raise errors.OpPrereqError("Cluster file storage dir not defined",
10552                                    errors.ECODE_STATE)
10553       joinargs.append(cfg_storagedir)
10554
10555       if self.op.file_storage_dir is not None:
10556         joinargs.append(self.op.file_storage_dir)
10557
10558       joinargs.append(self.op.instance_name)
10559
10560       # pylint: disable=W0142
10561       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10562
10563   def CheckPrereq(self): # pylint: disable=R0914
10564     """Check prerequisites.
10565
10566     """
10567     self._CalculateFileStorageDir()
10568
10569     if self.op.mode == constants.INSTANCE_IMPORT:
10570       export_info = self._ReadExportInfo()
10571       self._ReadExportParams(export_info)
10572       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10573     else:
10574       self._old_instance_name = None
10575
10576     if (not self.cfg.GetVGName() and
10577         self.op.disk_template not in constants.DTS_NOT_LVM):
10578       raise errors.OpPrereqError("Cluster does not support lvm-based"
10579                                  " instances", errors.ECODE_STATE)
10580
10581     if (self.op.hypervisor is None or
10582         self.op.hypervisor == constants.VALUE_AUTO):
10583       self.op.hypervisor = self.cfg.GetHypervisorType()
10584
10585     cluster = self.cfg.GetClusterInfo()
10586     enabled_hvs = cluster.enabled_hypervisors
10587     if self.op.hypervisor not in enabled_hvs:
10588       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10589                                  " cluster (%s)" %
10590                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10591                                  errors.ECODE_STATE)
10592
10593     # Check tag validity
10594     for tag in self.op.tags:
10595       objects.TaggableObject.ValidateTag(tag)
10596
10597     # check hypervisor parameter syntax (locally)
10598     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10600                                       self.op.hvparams)
10601     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602     hv_type.CheckParameterSyntax(filled_hvp)
10603     self.hv_full = filled_hvp
10604     # check that we don't specify global parameters on an instance
10605     _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10606                           "instance", "cluster")
10607
10608     # fill and remember the beparams dict
10609     self.be_full = _ComputeFullBeParams(self.op, cluster)
10610
10611     # build os parameters
10612     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10613
10614     # now that hvp/bep are in final format, let's reset to defaults,
10615     # if told to do so
10616     if self.op.identify_defaults:
10617       self._RevertToDefaults(cluster)
10618
10619     # NIC buildup
10620     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10621                              self.proc.GetECId())
10622
10623     # disk checks/pre-build
10624     default_vg = self.cfg.GetVGName()
10625     self.disks = _ComputeDisks(self.op, default_vg)
10626
10627     if self.op.mode == constants.INSTANCE_IMPORT:
10628       disk_images = []
10629       for idx in range(len(self.disks)):
10630         option = "disk%d_dump" % idx
10631         if export_info.has_option(constants.INISECT_INS, option):
10632           # FIXME: are the old os-es, disk sizes, etc. useful?
10633           export_name = export_info.get(constants.INISECT_INS, option)
10634           image = utils.PathJoin(self.op.src_path, export_name)
10635           disk_images.append(image)
10636         else:
10637           disk_images.append(False)
10638
10639       self.src_images = disk_images
10640
10641       if self.op.instance_name == self._old_instance_name:
10642         for idx, nic in enumerate(self.nics):
10643           if nic.mac == constants.VALUE_AUTO:
10644             nic_mac_ini = "nic%d_mac" % idx
10645             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10646
10647     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10648
10649     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10650     if self.op.ip_check:
10651       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10652         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10653                                    (self.check_ip, self.op.instance_name),
10654                                    errors.ECODE_NOTUNIQUE)
10655
10656     #### mac address generation
10657     # By generating here the mac address both the allocator and the hooks get
10658     # the real final mac address rather than the 'auto' or 'generate' value.
10659     # There is a race condition between the generation and the instance object
10660     # creation, which means that we know the mac is valid now, but we're not
10661     # sure it will be when we actually add the instance. If things go bad
10662     # adding the instance will abort because of a duplicate mac, and the
10663     # creation job will fail.
10664     for nic in self.nics:
10665       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10666         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10667
10668     #### allocator run
10669
10670     if self.op.iallocator is not None:
10671       self._RunAllocator()
10672
10673     # Release all unneeded node locks
10674     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10675     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10676     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10677     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10678
10679     assert (self.owned_locks(locking.LEVEL_NODE) ==
10680             self.owned_locks(locking.LEVEL_NODE_RES)), \
10681       "Node locks differ from node resource locks"
10682
10683     #### node related checks
10684
10685     # check primary node
10686     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10687     assert self.pnode is not None, \
10688       "Cannot retrieve locked node %s" % self.op.pnode
10689     if pnode.offline:
10690       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10691                                  pnode.name, errors.ECODE_STATE)
10692     if pnode.drained:
10693       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10694                                  pnode.name, errors.ECODE_STATE)
10695     if not pnode.vm_capable:
10696       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10697                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10698
10699     self.secondaries = []
10700
10701     # Fill in any IPs from IP pools. This must happen here, because we need to
10702     # know the nic's primary node, as specified by the iallocator
10703     for idx, nic in enumerate(self.nics):
10704       net_uuid = nic.network
10705       if net_uuid is not None:
10706         nobj = self.cfg.GetNetwork(net_uuid)
10707         netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10708         if netparams is None:
10709           raise errors.OpPrereqError("No netparams found for network"
10710                                      " %s. Propably not connected to"
10711                                      " node's %s nodegroup" %
10712                                      (nobj.name, self.pnode.name),
10713                                      errors.ECODE_INVAL)
10714         self.LogInfo("NIC/%d inherits netparams %s" %
10715                      (idx, netparams.values()))
10716         nic.nicparams = dict(netparams)
10717         if nic.ip is not None:
10718           if nic.ip.lower() == constants.NIC_IP_POOL:
10719             try:
10720               nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10721             except errors.ReservationError:
10722               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10723                                          " from the address pool" % idx,
10724                                          errors.ECODE_STATE)
10725             self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10726           else:
10727             try:
10728               self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10729             except errors.ReservationError:
10730               raise errors.OpPrereqError("IP address %s already in use"
10731                                          " or does not belong to network %s" %
10732                                          (nic.ip, nobj.name),
10733                                          errors.ECODE_NOTUNIQUE)
10734
10735       # net is None, ip None or given
10736       elif self.op.conflicts_check:
10737         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10738
10739     # mirror node verification
10740     if self.op.disk_template in constants.DTS_INT_MIRROR:
10741       if self.op.snode == pnode.name:
10742         raise errors.OpPrereqError("The secondary node cannot be the"
10743                                    " primary node", errors.ECODE_INVAL)
10744       _CheckNodeOnline(self, self.op.snode)
10745       _CheckNodeNotDrained(self, self.op.snode)
10746       _CheckNodeVmCapable(self, self.op.snode)
10747       self.secondaries.append(self.op.snode)
10748
10749       snode = self.cfg.GetNodeInfo(self.op.snode)
10750       if pnode.group != snode.group:
10751         self.LogWarning("The primary and secondary nodes are in two"
10752                         " different node groups; the disk parameters"
10753                         " from the first disk's node group will be"
10754                         " used")
10755
10756     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10757       nodes = [pnode]
10758       if self.op.disk_template in constants.DTS_INT_MIRROR:
10759         nodes.append(snode)
10760       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10761       if compat.any(map(has_es, nodes)):
10762         raise errors.OpPrereqError("Disk template %s not supported with"
10763                                    " exclusive storage" % self.op.disk_template,
10764                                    errors.ECODE_STATE)
10765
10766     nodenames = [pnode.name] + self.secondaries
10767
10768     # Verify instance specs
10769     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10770     ispec = {
10771       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10772       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10773       constants.ISPEC_DISK_COUNT: len(self.disks),
10774       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10775       constants.ISPEC_NIC_COUNT: len(self.nics),
10776       constants.ISPEC_SPINDLE_USE: spindle_use,
10777       }
10778
10779     group_info = self.cfg.GetNodeGroup(pnode.group)
10780     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10781     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10782     if not self.op.ignore_ipolicy and res:
10783       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10784              (pnode.group, group_info.name, utils.CommaJoin(res)))
10785       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10786
10787     if not self.adopt_disks:
10788       if self.op.disk_template == constants.DT_RBD:
10789         # _CheckRADOSFreeSpace() is just a placeholder.
10790         # Any function that checks prerequisites can be placed here.
10791         # Check if there is enough space on the RADOS cluster.
10792         _CheckRADOSFreeSpace()
10793       elif self.op.disk_template == constants.DT_EXT:
10794         # FIXME: Function that checks prereqs if needed
10795         pass
10796       else:
10797         # Check lv size requirements, if not adopting
10798         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10799         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10800
10801     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10802       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10803                                 disk[constants.IDISK_ADOPT])
10804                      for disk in self.disks])
10805       if len(all_lvs) != len(self.disks):
10806         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10807                                    errors.ECODE_INVAL)
10808       for lv_name in all_lvs:
10809         try:
10810           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10811           # to ReserveLV uses the same syntax
10812           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10813         except errors.ReservationError:
10814           raise errors.OpPrereqError("LV named %s used by another instance" %
10815                                      lv_name, errors.ECODE_NOTUNIQUE)
10816
10817       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10818       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10819
10820       node_lvs = self.rpc.call_lv_list([pnode.name],
10821                                        vg_names.payload.keys())[pnode.name]
10822       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10823       node_lvs = node_lvs.payload
10824
10825       delta = all_lvs.difference(node_lvs.keys())
10826       if delta:
10827         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10828                                    utils.CommaJoin(delta),
10829                                    errors.ECODE_INVAL)
10830       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10831       if online_lvs:
10832         raise errors.OpPrereqError("Online logical volumes found, cannot"
10833                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10834                                    errors.ECODE_STATE)
10835       # update the size of disk based on what is found
10836       for dsk in self.disks:
10837         dsk[constants.IDISK_SIZE] = \
10838           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10839                                         dsk[constants.IDISK_ADOPT])][0]))
10840
10841     elif self.op.disk_template == constants.DT_BLOCK:
10842       # Normalize and de-duplicate device paths
10843       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10844                        for disk in self.disks])
10845       if len(all_disks) != len(self.disks):
10846         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10847                                    errors.ECODE_INVAL)
10848       baddisks = [d for d in all_disks
10849                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10850       if baddisks:
10851         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10852                                    " cannot be adopted" %
10853                                    (utils.CommaJoin(baddisks),
10854                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10855                                    errors.ECODE_INVAL)
10856
10857       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10858                                             list(all_disks))[pnode.name]
10859       node_disks.Raise("Cannot get block device information from node %s" %
10860                        pnode.name)
10861       node_disks = node_disks.payload
10862       delta = all_disks.difference(node_disks.keys())
10863       if delta:
10864         raise errors.OpPrereqError("Missing block device(s): %s" %
10865                                    utils.CommaJoin(delta),
10866                                    errors.ECODE_INVAL)
10867       for dsk in self.disks:
10868         dsk[constants.IDISK_SIZE] = \
10869           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10870
10871     # Verify instance specs
10872     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10873     ispec = {
10874       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10875       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10876       constants.ISPEC_DISK_COUNT: len(self.disks),
10877       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10878                                   for disk in self.disks],
10879       constants.ISPEC_NIC_COUNT: len(self.nics),
10880       constants.ISPEC_SPINDLE_USE: spindle_use,
10881       }
10882
10883     group_info = self.cfg.GetNodeGroup(pnode.group)
10884     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10885     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10886     if not self.op.ignore_ipolicy and res:
10887       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10888                                   " policy: %s") % (pnode.group,
10889                                                     utils.CommaJoin(res)),
10890                                   errors.ECODE_INVAL)
10891
10892     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10893
10894     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10895     # check OS parameters (remotely)
10896     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10897
10898     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10899
10900     #TODO: _CheckExtParams (remotely)
10901     # Check parameters for extstorage
10902
10903     # memory check on primary node
10904     #TODO(dynmem): use MINMEM for checking
10905     if self.op.start:
10906       _CheckNodeFreeMemory(self, self.pnode.name,
10907                            "creating instance %s" % self.op.instance_name,
10908                            self.be_full[constants.BE_MAXMEM],
10909                            self.op.hypervisor)
10910
10911     self.dry_run_result = list(nodenames)
10912
10913   def Exec(self, feedback_fn):
10914     """Create and add the instance to the cluster.
10915
10916     """
10917     instance = self.op.instance_name
10918     pnode_name = self.pnode.name
10919
10920     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10921                 self.owned_locks(locking.LEVEL_NODE)), \
10922       "Node locks differ from node resource locks"
10923     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10924
10925     ht_kind = self.op.hypervisor
10926     if ht_kind in constants.HTS_REQ_PORT:
10927       network_port = self.cfg.AllocatePort()
10928     else:
10929       network_port = None
10930
10931     # This is ugly but we got a chicken-egg problem here
10932     # We can only take the group disk parameters, as the instance
10933     # has no disks yet (we are generating them right here).
10934     node = self.cfg.GetNodeInfo(pnode_name)
10935     nodegroup = self.cfg.GetNodeGroup(node.group)
10936     disks = _GenerateDiskTemplate(self,
10937                                   self.op.disk_template,
10938                                   instance, pnode_name,
10939                                   self.secondaries,
10940                                   self.disks,
10941                                   self.instance_file_storage_dir,
10942                                   self.op.file_driver,
10943                                   0,
10944                                   feedback_fn,
10945                                   self.cfg.GetGroupDiskParams(nodegroup))
10946
10947     iobj = objects.Instance(name=instance, os=self.op.os_type,
10948                             primary_node=pnode_name,
10949                             nics=self.nics, disks=disks,
10950                             disk_template=self.op.disk_template,
10951                             admin_state=constants.ADMINST_DOWN,
10952                             network_port=network_port,
10953                             beparams=self.op.beparams,
10954                             hvparams=self.op.hvparams,
10955                             hypervisor=self.op.hypervisor,
10956                             osparams=self.op.osparams,
10957                             )
10958
10959     if self.op.tags:
10960       for tag in self.op.tags:
10961         iobj.AddTag(tag)
10962
10963     if self.adopt_disks:
10964       if self.op.disk_template == constants.DT_PLAIN:
10965         # rename LVs to the newly-generated names; we need to construct
10966         # 'fake' LV disks with the old data, plus the new unique_id
10967         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10968         rename_to = []
10969         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10970           rename_to.append(t_dsk.logical_id)
10971           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10972           self.cfg.SetDiskID(t_dsk, pnode_name)
10973         result = self.rpc.call_blockdev_rename(pnode_name,
10974                                                zip(tmp_disks, rename_to))
10975         result.Raise("Failed to rename adoped LVs")
10976     else:
10977       feedback_fn("* creating instance disks...")
10978       try:
10979         _CreateDisks(self, iobj)
10980       except errors.OpExecError:
10981         self.LogWarning("Device creation failed, reverting...")
10982         try:
10983           _RemoveDisks(self, iobj)
10984         finally:
10985           self.cfg.ReleaseDRBDMinors(instance)
10986           raise
10987
10988     feedback_fn("adding instance %s to cluster config" % instance)
10989
10990     self.cfg.AddInstance(iobj, self.proc.GetECId())
10991
10992     # Declare that we don't want to remove the instance lock anymore, as we've
10993     # added the instance to the config
10994     del self.remove_locks[locking.LEVEL_INSTANCE]
10995
10996     if self.op.mode == constants.INSTANCE_IMPORT:
10997       # Release unused nodes
10998       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10999     else:
11000       # Release all nodes
11001       _ReleaseLocks(self, locking.LEVEL_NODE)
11002
11003     disk_abort = False
11004     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11005       feedback_fn("* wiping instance disks...")
11006       try:
11007         _WipeDisks(self, iobj)
11008       except errors.OpExecError, err:
11009         logging.exception("Wiping disks failed")
11010         self.LogWarning("Wiping instance disks failed (%s)", err)
11011         disk_abort = True
11012
11013     if disk_abort:
11014       # Something is already wrong with the disks, don't do anything else
11015       pass
11016     elif self.op.wait_for_sync:
11017       disk_abort = not _WaitForSync(self, iobj)
11018     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11019       # make sure the disks are not degraded (still sync-ing is ok)
11020       feedback_fn("* checking mirrors status")
11021       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11022     else:
11023       disk_abort = False
11024
11025     if disk_abort:
11026       _RemoveDisks(self, iobj)
11027       self.cfg.RemoveInstance(iobj.name)
11028       # Make sure the instance lock gets removed
11029       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11030       raise errors.OpExecError("There are some degraded disks for"
11031                                " this instance")
11032
11033     # Release all node resource locks
11034     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11035
11036     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11037       # we need to set the disks ID to the primary node, since the
11038       # preceding code might or might have not done it, depending on
11039       # disk template and other options
11040       for disk in iobj.disks:
11041         self.cfg.SetDiskID(disk, pnode_name)
11042       if self.op.mode == constants.INSTANCE_CREATE:
11043         if not self.op.no_install:
11044           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11045                         not self.op.wait_for_sync)
11046           if pause_sync:
11047             feedback_fn("* pausing disk sync to install instance OS")
11048             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11049                                                               (iobj.disks,
11050                                                                iobj), True)
11051             for idx, success in enumerate(result.payload):
11052               if not success:
11053                 logging.warn("pause-sync of instance %s for disk %d failed",
11054                              instance, idx)
11055
11056           feedback_fn("* running the instance OS create scripts...")
11057           # FIXME: pass debug option from opcode to backend
11058           os_add_result = \
11059             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11060                                           self.op.debug_level)
11061           if pause_sync:
11062             feedback_fn("* resuming disk sync")
11063             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11064                                                               (iobj.disks,
11065                                                                iobj), False)
11066             for idx, success in enumerate(result.payload):
11067               if not success:
11068                 logging.warn("resume-sync of instance %s for disk %d failed",
11069                              instance, idx)
11070
11071           os_add_result.Raise("Could not add os for instance %s"
11072                               " on node %s" % (instance, pnode_name))
11073
11074       else:
11075         if self.op.mode == constants.INSTANCE_IMPORT:
11076           feedback_fn("* running the instance OS import scripts...")
11077
11078           transfers = []
11079
11080           for idx, image in enumerate(self.src_images):
11081             if not image:
11082               continue
11083
11084             # FIXME: pass debug option from opcode to backend
11085             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11086                                                constants.IEIO_FILE, (image, ),
11087                                                constants.IEIO_SCRIPT,
11088                                                (iobj.disks[idx], idx),
11089                                                None)
11090             transfers.append(dt)
11091
11092           import_result = \
11093             masterd.instance.TransferInstanceData(self, feedback_fn,
11094                                                   self.op.src_node, pnode_name,
11095                                                   self.pnode.secondary_ip,
11096                                                   iobj, transfers)
11097           if not compat.all(import_result):
11098             self.LogWarning("Some disks for instance %s on node %s were not"
11099                             " imported successfully" % (instance, pnode_name))
11100
11101           rename_from = self._old_instance_name
11102
11103         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11104           feedback_fn("* preparing remote import...")
11105           # The source cluster will stop the instance before attempting to make
11106           # a connection. In some cases stopping an instance can take a long
11107           # time, hence the shutdown timeout is added to the connection
11108           # timeout.
11109           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11110                              self.op.source_shutdown_timeout)
11111           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11112
11113           assert iobj.primary_node == self.pnode.name
11114           disk_results = \
11115             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11116                                           self.source_x509_ca,
11117                                           self._cds, timeouts)
11118           if not compat.all(disk_results):
11119             # TODO: Should the instance still be started, even if some disks
11120             # failed to import (valid for local imports, too)?
11121             self.LogWarning("Some disks for instance %s on node %s were not"
11122                             " imported successfully" % (instance, pnode_name))
11123
11124           rename_from = self.source_instance_name
11125
11126         else:
11127           # also checked in the prereq part
11128           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11129                                        % self.op.mode)
11130
11131         # Run rename script on newly imported instance
11132         assert iobj.name == instance
11133         feedback_fn("Running rename script for %s" % instance)
11134         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11135                                                    rename_from,
11136                                                    self.op.debug_level)
11137         if result.fail_msg:
11138           self.LogWarning("Failed to run rename script for %s on node"
11139                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11140
11141     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11142
11143     if self.op.start:
11144       iobj.admin_state = constants.ADMINST_UP
11145       self.cfg.Update(iobj, feedback_fn)
11146       logging.info("Starting instance %s on node %s", instance, pnode_name)
11147       feedback_fn("* starting instance...")
11148       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11149                                             False)
11150       result.Raise("Could not start instance")
11151
11152     return list(iobj.all_nodes)
11153
11154
11155 class LUInstanceMultiAlloc(NoHooksLU):
11156   """Allocates multiple instances at the same time.
11157
11158   """
11159   REQ_BGL = False
11160
11161   def CheckArguments(self):
11162     """Check arguments.
11163
11164     """
11165     nodes = []
11166     for inst in self.op.instances:
11167       if inst.iallocator is not None:
11168         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11169                                    " instance objects", errors.ECODE_INVAL)
11170       nodes.append(bool(inst.pnode))
11171       if inst.disk_template in constants.DTS_INT_MIRROR:
11172         nodes.append(bool(inst.snode))
11173
11174     has_nodes = compat.any(nodes)
11175     if compat.all(nodes) ^ has_nodes:
11176       raise errors.OpPrereqError("There are instance objects providing"
11177                                  " pnode/snode while others do not",
11178                                  errors.ECODE_INVAL)
11179
11180     if self.op.iallocator is None:
11181       default_iallocator = self.cfg.GetDefaultIAllocator()
11182       if default_iallocator and has_nodes:
11183         self.op.iallocator = default_iallocator
11184       else:
11185         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11186                                    " given and no cluster-wide default"
11187                                    " iallocator found; please specify either"
11188                                    " an iallocator or nodes on the instances"
11189                                    " or set a cluster-wide default iallocator",
11190                                    errors.ECODE_INVAL)
11191
11192     _CheckOpportunisticLocking(self.op)
11193
11194     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11195     if dups:
11196       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11197                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11198
11199   def ExpandNames(self):
11200     """Calculate the locks.
11201
11202     """
11203     self.share_locks = _ShareAll()
11204     self.needed_locks = {
11205       # iallocator will select nodes and even if no iallocator is used,
11206       # collisions with LUInstanceCreate should be avoided
11207       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11208       }
11209
11210     if self.op.iallocator:
11211       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11212       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11213
11214       if self.op.opportunistic_locking:
11215         self.opportunistic_locks[locking.LEVEL_NODE] = True
11216         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11217     else:
11218       nodeslist = []
11219       for inst in self.op.instances:
11220         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11221         nodeslist.append(inst.pnode)
11222         if inst.snode is not None:
11223           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11224           nodeslist.append(inst.snode)
11225
11226       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11227       # Lock resources of instance's primary and secondary nodes (copy to
11228       # prevent accidential modification)
11229       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11230
11231   def CheckPrereq(self):
11232     """Check prerequisite.
11233
11234     """
11235     cluster = self.cfg.GetClusterInfo()
11236     default_vg = self.cfg.GetVGName()
11237     ec_id = self.proc.GetECId()
11238
11239     if self.op.opportunistic_locking:
11240       # Only consider nodes for which a lock is held
11241       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11242     else:
11243       node_whitelist = None
11244
11245     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11246                                          _ComputeNics(op, cluster, None,
11247                                                       self.cfg, ec_id),
11248                                          _ComputeFullBeParams(op, cluster),
11249                                          node_whitelist)
11250              for op in self.op.instances]
11251
11252     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11253     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11254
11255     ial.Run(self.op.iallocator)
11256
11257     if not ial.success:
11258       raise errors.OpPrereqError("Can't compute nodes using"
11259                                  " iallocator '%s': %s" %
11260                                  (self.op.iallocator, ial.info),
11261                                  errors.ECODE_NORES)
11262
11263     self.ia_result = ial.result
11264
11265     if self.op.dry_run:
11266       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11267         constants.JOB_IDS_KEY: [],
11268         })
11269
11270   def _ConstructPartialResult(self):
11271     """Contructs the partial result.
11272
11273     """
11274     (allocatable, failed) = self.ia_result
11275     return {
11276       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11277         map(compat.fst, allocatable),
11278       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11279       }
11280
11281   def Exec(self, feedback_fn):
11282     """Executes the opcode.
11283
11284     """
11285     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11286     (allocatable, failed) = self.ia_result
11287
11288     jobs = []
11289     for (name, nodes) in allocatable:
11290       op = op2inst.pop(name)
11291
11292       if len(nodes) > 1:
11293         (op.pnode, op.snode) = nodes
11294       else:
11295         (op.pnode,) = nodes
11296
11297       jobs.append([op])
11298
11299     missing = set(op2inst.keys()) - set(failed)
11300     assert not missing, \
11301       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11302
11303     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11304
11305
11306 def _CheckRADOSFreeSpace():
11307   """Compute disk size requirements inside the RADOS cluster.
11308
11309   """
11310   # For the RADOS cluster we assume there is always enough space.
11311   pass
11312
11313
11314 class LUInstanceConsole(NoHooksLU):
11315   """Connect to an instance's console.
11316
11317   This is somewhat special in that it returns the command line that
11318   you need to run on the master node in order to connect to the
11319   console.
11320
11321   """
11322   REQ_BGL = False
11323
11324   def ExpandNames(self):
11325     self.share_locks = _ShareAll()
11326     self._ExpandAndLockInstance()
11327
11328   def CheckPrereq(self):
11329     """Check prerequisites.
11330
11331     This checks that the instance is in the cluster.
11332
11333     """
11334     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11335     assert self.instance is not None, \
11336       "Cannot retrieve locked instance %s" % self.op.instance_name
11337     _CheckNodeOnline(self, self.instance.primary_node)
11338
11339   def Exec(self, feedback_fn):
11340     """Connect to the console of an instance
11341
11342     """
11343     instance = self.instance
11344     node = instance.primary_node
11345
11346     node_insts = self.rpc.call_instance_list([node],
11347                                              [instance.hypervisor])[node]
11348     node_insts.Raise("Can't get node information from %s" % node)
11349
11350     if instance.name not in node_insts.payload:
11351       if instance.admin_state == constants.ADMINST_UP:
11352         state = constants.INSTST_ERRORDOWN
11353       elif instance.admin_state == constants.ADMINST_DOWN:
11354         state = constants.INSTST_ADMINDOWN
11355       else:
11356         state = constants.INSTST_ADMINOFFLINE
11357       raise errors.OpExecError("Instance %s is not running (state %s)" %
11358                                (instance.name, state))
11359
11360     logging.debug("Connecting to console of %s on %s", instance.name, node)
11361
11362     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11363
11364
11365 def _GetInstanceConsole(cluster, instance):
11366   """Returns console information for an instance.
11367
11368   @type cluster: L{objects.Cluster}
11369   @type instance: L{objects.Instance}
11370   @rtype: dict
11371
11372   """
11373   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11374   # beparams and hvparams are passed separately, to avoid editing the
11375   # instance and then saving the defaults in the instance itself.
11376   hvparams = cluster.FillHV(instance)
11377   beparams = cluster.FillBE(instance)
11378   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11379
11380   assert console.instance == instance.name
11381   assert console.Validate()
11382
11383   return console.ToDict()
11384
11385
11386 class LUInstanceReplaceDisks(LogicalUnit):
11387   """Replace the disks of an instance.
11388
11389   """
11390   HPATH = "mirrors-replace"
11391   HTYPE = constants.HTYPE_INSTANCE
11392   REQ_BGL = False
11393
11394   def CheckArguments(self):
11395     """Check arguments.
11396
11397     """
11398     remote_node = self.op.remote_node
11399     ialloc = self.op.iallocator
11400     if self.op.mode == constants.REPLACE_DISK_CHG:
11401       if remote_node is None and ialloc is None:
11402         raise errors.OpPrereqError("When changing the secondary either an"
11403                                    " iallocator script must be used or the"
11404                                    " new node given", errors.ECODE_INVAL)
11405       else:
11406         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11407
11408     elif remote_node is not None or ialloc is not None:
11409       # Not replacing the secondary
11410       raise errors.OpPrereqError("The iallocator and new node options can"
11411                                  " only be used when changing the"
11412                                  " secondary node", errors.ECODE_INVAL)
11413
11414   def ExpandNames(self):
11415     self._ExpandAndLockInstance()
11416
11417     assert locking.LEVEL_NODE not in self.needed_locks
11418     assert locking.LEVEL_NODE_RES not in self.needed_locks
11419     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11420
11421     assert self.op.iallocator is None or self.op.remote_node is None, \
11422       "Conflicting options"
11423
11424     if self.op.remote_node is not None:
11425       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11426
11427       # Warning: do not remove the locking of the new secondary here
11428       # unless DRBD8.AddChildren is changed to work in parallel;
11429       # currently it doesn't since parallel invocations of
11430       # FindUnusedMinor will conflict
11431       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11432       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11433     else:
11434       self.needed_locks[locking.LEVEL_NODE] = []
11435       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11436
11437       if self.op.iallocator is not None:
11438         # iallocator will select a new node in the same group
11439         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11440         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11441
11442     self.needed_locks[locking.LEVEL_NODE_RES] = []
11443
11444     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11445                                    self.op.iallocator, self.op.remote_node,
11446                                    self.op.disks, self.op.early_release,
11447                                    self.op.ignore_ipolicy)
11448
11449     self.tasklets = [self.replacer]
11450
11451   def DeclareLocks(self, level):
11452     if level == locking.LEVEL_NODEGROUP:
11453       assert self.op.remote_node is None
11454       assert self.op.iallocator is not None
11455       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11456
11457       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11458       # Lock all groups used by instance optimistically; this requires going
11459       # via the node before it's locked, requiring verification later on
11460       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11461         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11462
11463     elif level == locking.LEVEL_NODE:
11464       if self.op.iallocator is not None:
11465         assert self.op.remote_node is None
11466         assert not self.needed_locks[locking.LEVEL_NODE]
11467         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11468
11469         # Lock member nodes of all locked groups
11470         self.needed_locks[locking.LEVEL_NODE] = \
11471             [node_name
11472              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11473              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11474       else:
11475         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11476
11477         self._LockInstancesNodes()
11478
11479     elif level == locking.LEVEL_NODE_RES:
11480       # Reuse node locks
11481       self.needed_locks[locking.LEVEL_NODE_RES] = \
11482         self.needed_locks[locking.LEVEL_NODE]
11483
11484   def BuildHooksEnv(self):
11485     """Build hooks env.
11486
11487     This runs on the master, the primary and all the secondaries.
11488
11489     """
11490     instance = self.replacer.instance
11491     env = {
11492       "MODE": self.op.mode,
11493       "NEW_SECONDARY": self.op.remote_node,
11494       "OLD_SECONDARY": instance.secondary_nodes[0],
11495       }
11496     env.update(_BuildInstanceHookEnvByObject(self, instance))
11497     return env
11498
11499   def BuildHooksNodes(self):
11500     """Build hooks nodes.
11501
11502     """
11503     instance = self.replacer.instance
11504     nl = [
11505       self.cfg.GetMasterNode(),
11506       instance.primary_node,
11507       ]
11508     if self.op.remote_node is not None:
11509       nl.append(self.op.remote_node)
11510     return nl, nl
11511
11512   def CheckPrereq(self):
11513     """Check prerequisites.
11514
11515     """
11516     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11517             self.op.iallocator is None)
11518
11519     # Verify if node group locks are still correct
11520     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11521     if owned_groups:
11522       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11523
11524     return LogicalUnit.CheckPrereq(self)
11525
11526
11527 class TLReplaceDisks(Tasklet):
11528   """Replaces disks for an instance.
11529
11530   Note: Locking is not within the scope of this class.
11531
11532   """
11533   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11534                disks, early_release, ignore_ipolicy):
11535     """Initializes this class.
11536
11537     """
11538     Tasklet.__init__(self, lu)
11539
11540     # Parameters
11541     self.instance_name = instance_name
11542     self.mode = mode
11543     self.iallocator_name = iallocator_name
11544     self.remote_node = remote_node
11545     self.disks = disks
11546     self.early_release = early_release
11547     self.ignore_ipolicy = ignore_ipolicy
11548
11549     # Runtime data
11550     self.instance = None
11551     self.new_node = None
11552     self.target_node = None
11553     self.other_node = None
11554     self.remote_node_info = None
11555     self.node_secondary_ip = None
11556
11557   @staticmethod
11558   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11559     """Compute a new secondary node using an IAllocator.
11560
11561     """
11562     req = iallocator.IAReqRelocate(name=instance_name,
11563                                    relocate_from=list(relocate_from))
11564     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11565
11566     ial.Run(iallocator_name)
11567
11568     if not ial.success:
11569       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11570                                  " %s" % (iallocator_name, ial.info),
11571                                  errors.ECODE_NORES)
11572
11573     remote_node_name = ial.result[0]
11574
11575     lu.LogInfo("Selected new secondary for instance '%s': %s",
11576                instance_name, remote_node_name)
11577
11578     return remote_node_name
11579
11580   def _FindFaultyDisks(self, node_name):
11581     """Wrapper for L{_FindFaultyInstanceDisks}.
11582
11583     """
11584     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11585                                     node_name, True)
11586
11587   def _CheckDisksActivated(self, instance):
11588     """Checks if the instance disks are activated.
11589
11590     @param instance: The instance to check disks
11591     @return: True if they are activated, False otherwise
11592
11593     """
11594     nodes = instance.all_nodes
11595
11596     for idx, dev in enumerate(instance.disks):
11597       for node in nodes:
11598         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11599         self.cfg.SetDiskID(dev, node)
11600
11601         result = _BlockdevFind(self, node, dev, instance)
11602
11603         if result.offline:
11604           continue
11605         elif result.fail_msg or not result.payload:
11606           return False
11607
11608     return True
11609
11610   def CheckPrereq(self):
11611     """Check prerequisites.
11612
11613     This checks that the instance is in the cluster.
11614
11615     """
11616     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11617     assert instance is not None, \
11618       "Cannot retrieve locked instance %s" % self.instance_name
11619
11620     if instance.disk_template != constants.DT_DRBD8:
11621       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11622                                  " instances", errors.ECODE_INVAL)
11623
11624     if len(instance.secondary_nodes) != 1:
11625       raise errors.OpPrereqError("The instance has a strange layout,"
11626                                  " expected one secondary but found %d" %
11627                                  len(instance.secondary_nodes),
11628                                  errors.ECODE_FAULT)
11629
11630     instance = self.instance
11631     secondary_node = instance.secondary_nodes[0]
11632
11633     if self.iallocator_name is None:
11634       remote_node = self.remote_node
11635     else:
11636       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11637                                        instance.name, instance.secondary_nodes)
11638
11639     if remote_node is None:
11640       self.remote_node_info = None
11641     else:
11642       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11643              "Remote node '%s' is not locked" % remote_node
11644
11645       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11646       assert self.remote_node_info is not None, \
11647         "Cannot retrieve locked node %s" % remote_node
11648
11649     if remote_node == self.instance.primary_node:
11650       raise errors.OpPrereqError("The specified node is the primary node of"
11651                                  " the instance", errors.ECODE_INVAL)
11652
11653     if remote_node == secondary_node:
11654       raise errors.OpPrereqError("The specified node is already the"
11655                                  " secondary node of the instance",
11656                                  errors.ECODE_INVAL)
11657
11658     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11659                                     constants.REPLACE_DISK_CHG):
11660       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11661                                  errors.ECODE_INVAL)
11662
11663     if self.mode == constants.REPLACE_DISK_AUTO:
11664       if not self._CheckDisksActivated(instance):
11665         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11666                                    " first" % self.instance_name,
11667                                    errors.ECODE_STATE)
11668       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11669       faulty_secondary = self._FindFaultyDisks(secondary_node)
11670
11671       if faulty_primary and faulty_secondary:
11672         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11673                                    " one node and can not be repaired"
11674                                    " automatically" % self.instance_name,
11675                                    errors.ECODE_STATE)
11676
11677       if faulty_primary:
11678         self.disks = faulty_primary
11679         self.target_node = instance.primary_node
11680         self.other_node = secondary_node
11681         check_nodes = [self.target_node, self.other_node]
11682       elif faulty_secondary:
11683         self.disks = faulty_secondary
11684         self.target_node = secondary_node
11685         self.other_node = instance.primary_node
11686         check_nodes = [self.target_node, self.other_node]
11687       else:
11688         self.disks = []
11689         check_nodes = []
11690
11691     else:
11692       # Non-automatic modes
11693       if self.mode == constants.REPLACE_DISK_PRI:
11694         self.target_node = instance.primary_node
11695         self.other_node = secondary_node
11696         check_nodes = [self.target_node, self.other_node]
11697
11698       elif self.mode == constants.REPLACE_DISK_SEC:
11699         self.target_node = secondary_node
11700         self.other_node = instance.primary_node
11701         check_nodes = [self.target_node, self.other_node]
11702
11703       elif self.mode == constants.REPLACE_DISK_CHG:
11704         self.new_node = remote_node
11705         self.other_node = instance.primary_node
11706         self.target_node = secondary_node
11707         check_nodes = [self.new_node, self.other_node]
11708
11709         _CheckNodeNotDrained(self.lu, remote_node)
11710         _CheckNodeVmCapable(self.lu, remote_node)
11711
11712         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11713         assert old_node_info is not None
11714         if old_node_info.offline and not self.early_release:
11715           # doesn't make sense to delay the release
11716           self.early_release = True
11717           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11718                           " early-release mode", secondary_node)
11719
11720       else:
11721         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11722                                      self.mode)
11723
11724       # If not specified all disks should be replaced
11725       if not self.disks:
11726         self.disks = range(len(self.instance.disks))
11727
11728     # TODO: This is ugly, but right now we can't distinguish between internal
11729     # submitted opcode and external one. We should fix that.
11730     if self.remote_node_info:
11731       # We change the node, lets verify it still meets instance policy
11732       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11733       cluster = self.cfg.GetClusterInfo()
11734       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11735                                                               new_group_info)
11736       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11737                               ignore=self.ignore_ipolicy)
11738
11739     for node in check_nodes:
11740       _CheckNodeOnline(self.lu, node)
11741
11742     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11743                                                           self.other_node,
11744                                                           self.target_node]
11745                               if node_name is not None)
11746
11747     # Release unneeded node and node resource locks
11748     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11749     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11750     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11751
11752     # Release any owned node group
11753     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11754
11755     # Check whether disks are valid
11756     for disk_idx in self.disks:
11757       instance.FindDisk(disk_idx)
11758
11759     # Get secondary node IP addresses
11760     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11761                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11762
11763   def Exec(self, feedback_fn):
11764     """Execute disk replacement.
11765
11766     This dispatches the disk replacement to the appropriate handler.
11767
11768     """
11769     if __debug__:
11770       # Verify owned locks before starting operation
11771       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11772       assert set(owned_nodes) == set(self.node_secondary_ip), \
11773           ("Incorrect node locks, owning %s, expected %s" %
11774            (owned_nodes, self.node_secondary_ip.keys()))
11775       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11776               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11777       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11778
11779       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11780       assert list(owned_instances) == [self.instance_name], \
11781           "Instance '%s' not locked" % self.instance_name
11782
11783       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11784           "Should not own any node group lock at this point"
11785
11786     if not self.disks:
11787       feedback_fn("No disks need replacement for instance '%s'" %
11788                   self.instance.name)
11789       return
11790
11791     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11792                 (utils.CommaJoin(self.disks), self.instance.name))
11793     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11794     feedback_fn("Current seconary node: %s" %
11795                 utils.CommaJoin(self.instance.secondary_nodes))
11796
11797     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11798
11799     # Activate the instance disks if we're replacing them on a down instance
11800     if activate_disks:
11801       _StartInstanceDisks(self.lu, self.instance, True)
11802
11803     try:
11804       # Should we replace the secondary node?
11805       if self.new_node is not None:
11806         fn = self._ExecDrbd8Secondary
11807       else:
11808         fn = self._ExecDrbd8DiskOnly
11809
11810       result = fn(feedback_fn)
11811     finally:
11812       # Deactivate the instance disks if we're replacing them on a
11813       # down instance
11814       if activate_disks:
11815         _SafeShutdownInstanceDisks(self.lu, self.instance)
11816
11817     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11818
11819     if __debug__:
11820       # Verify owned locks
11821       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11822       nodes = frozenset(self.node_secondary_ip)
11823       assert ((self.early_release and not owned_nodes) or
11824               (not self.early_release and not (set(owned_nodes) - nodes))), \
11825         ("Not owning the correct locks, early_release=%s, owned=%r,"
11826          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11827
11828     return result
11829
11830   def _CheckVolumeGroup(self, nodes):
11831     self.lu.LogInfo("Checking volume groups")
11832
11833     vgname = self.cfg.GetVGName()
11834
11835     # Make sure volume group exists on all involved nodes
11836     results = self.rpc.call_vg_list(nodes)
11837     if not results:
11838       raise errors.OpExecError("Can't list volume groups on the nodes")
11839
11840     for node in nodes:
11841       res = results[node]
11842       res.Raise("Error checking node %s" % node)
11843       if vgname not in res.payload:
11844         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11845                                  (vgname, node))
11846
11847   def _CheckDisksExistence(self, nodes):
11848     # Check disk existence
11849     for idx, dev in enumerate(self.instance.disks):
11850       if idx not in self.disks:
11851         continue
11852
11853       for node in nodes:
11854         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11855         self.cfg.SetDiskID(dev, node)
11856
11857         result = _BlockdevFind(self, node, dev, self.instance)
11858
11859         msg = result.fail_msg
11860         if msg or not result.payload:
11861           if not msg:
11862             msg = "disk not found"
11863           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11864                                    (idx, node, msg))
11865
11866   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11867     for idx, dev in enumerate(self.instance.disks):
11868       if idx not in self.disks:
11869         continue
11870
11871       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11872                       (idx, node_name))
11873
11874       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11875                                    on_primary, ldisk=ldisk):
11876         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11877                                  " replace disks for instance %s" %
11878                                  (node_name, self.instance.name))
11879
11880   def _CreateNewStorage(self, node_name):
11881     """Create new storage on the primary or secondary node.
11882
11883     This is only used for same-node replaces, not for changing the
11884     secondary node, hence we don't want to modify the existing disk.
11885
11886     """
11887     iv_names = {}
11888
11889     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11890     for idx, dev in enumerate(disks):
11891       if idx not in self.disks:
11892         continue
11893
11894       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11895
11896       self.cfg.SetDiskID(dev, node_name)
11897
11898       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11899       names = _GenerateUniqueNames(self.lu, lv_names)
11900
11901       (data_disk, meta_disk) = dev.children
11902       vg_data = data_disk.logical_id[0]
11903       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11904                              logical_id=(vg_data, names[0]),
11905                              params=data_disk.params)
11906       vg_meta = meta_disk.logical_id[0]
11907       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11908                              size=constants.DRBD_META_SIZE,
11909                              logical_id=(vg_meta, names[1]),
11910                              params=meta_disk.params)
11911
11912       new_lvs = [lv_data, lv_meta]
11913       old_lvs = [child.Copy() for child in dev.children]
11914       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11915       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11916
11917       # we pass force_create=True to force the LVM creation
11918       for new_lv in new_lvs:
11919         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11920                              _GetInstanceInfoText(self.instance), False,
11921                              excl_stor)
11922
11923     return iv_names
11924
11925   def _CheckDevices(self, node_name, iv_names):
11926     for name, (dev, _, _) in iv_names.iteritems():
11927       self.cfg.SetDiskID(dev, node_name)
11928
11929       result = _BlockdevFind(self, node_name, dev, self.instance)
11930
11931       msg = result.fail_msg
11932       if msg or not result.payload:
11933         if not msg:
11934           msg = "disk not found"
11935         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11936                                  (name, msg))
11937
11938       if result.payload.is_degraded:
11939         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11940
11941   def _RemoveOldStorage(self, node_name, iv_names):
11942     for name, (_, old_lvs, _) in iv_names.iteritems():
11943       self.lu.LogInfo("Remove logical volumes for %s", name)
11944
11945       for lv in old_lvs:
11946         self.cfg.SetDiskID(lv, node_name)
11947
11948         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11949         if msg:
11950           self.lu.LogWarning("Can't remove old LV: %s", msg,
11951                              hint="remove unused LVs manually")
11952
11953   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11954     """Replace a disk on the primary or secondary for DRBD 8.
11955
11956     The algorithm for replace is quite complicated:
11957
11958       1. for each disk to be replaced:
11959
11960         1. create new LVs on the target node with unique names
11961         1. detach old LVs from the drbd device
11962         1. rename old LVs to name_replaced.<time_t>
11963         1. rename new LVs to old LVs
11964         1. attach the new LVs (with the old names now) to the drbd device
11965
11966       1. wait for sync across all devices
11967
11968       1. for each modified disk:
11969
11970         1. remove old LVs (which have the name name_replaces.<time_t>)
11971
11972     Failures are not very well handled.
11973
11974     """
11975     steps_total = 6
11976
11977     # Step: check device activation
11978     self.lu.LogStep(1, steps_total, "Check device existence")
11979     self._CheckDisksExistence([self.other_node, self.target_node])
11980     self._CheckVolumeGroup([self.target_node, self.other_node])
11981
11982     # Step: check other node consistency
11983     self.lu.LogStep(2, steps_total, "Check peer consistency")
11984     self._CheckDisksConsistency(self.other_node,
11985                                 self.other_node == self.instance.primary_node,
11986                                 False)
11987
11988     # Step: create new storage
11989     self.lu.LogStep(3, steps_total, "Allocate new storage")
11990     iv_names = self._CreateNewStorage(self.target_node)
11991
11992     # Step: for each lv, detach+rename*2+attach
11993     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11994     for dev, old_lvs, new_lvs in iv_names.itervalues():
11995       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11996
11997       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11998                                                      old_lvs)
11999       result.Raise("Can't detach drbd from local storage on node"
12000                    " %s for device %s" % (self.target_node, dev.iv_name))
12001       #dev.children = []
12002       #cfg.Update(instance)
12003
12004       # ok, we created the new LVs, so now we know we have the needed
12005       # storage; as such, we proceed on the target node to rename
12006       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12007       # using the assumption that logical_id == physical_id (which in
12008       # turn is the unique_id on that node)
12009
12010       # FIXME(iustin): use a better name for the replaced LVs
12011       temp_suffix = int(time.time())
12012       ren_fn = lambda d, suff: (d.physical_id[0],
12013                                 d.physical_id[1] + "_replaced-%s" % suff)
12014
12015       # Build the rename list based on what LVs exist on the node
12016       rename_old_to_new = []
12017       for to_ren in old_lvs:
12018         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12019         if not result.fail_msg and result.payload:
12020           # device exists
12021           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12022
12023       self.lu.LogInfo("Renaming the old LVs on the target node")
12024       result = self.rpc.call_blockdev_rename(self.target_node,
12025                                              rename_old_to_new)
12026       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12027
12028       # Now we rename the new LVs to the old LVs
12029       self.lu.LogInfo("Renaming the new LVs on the target node")
12030       rename_new_to_old = [(new, old.physical_id)
12031                            for old, new in zip(old_lvs, new_lvs)]
12032       result = self.rpc.call_blockdev_rename(self.target_node,
12033                                              rename_new_to_old)
12034       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12035
12036       # Intermediate steps of in memory modifications
12037       for old, new in zip(old_lvs, new_lvs):
12038         new.logical_id = old.logical_id
12039         self.cfg.SetDiskID(new, self.target_node)
12040
12041       # We need to modify old_lvs so that removal later removes the
12042       # right LVs, not the newly added ones; note that old_lvs is a
12043       # copy here
12044       for disk in old_lvs:
12045         disk.logical_id = ren_fn(disk, temp_suffix)
12046         self.cfg.SetDiskID(disk, self.target_node)
12047
12048       # Now that the new lvs have the old name, we can add them to the device
12049       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12050       result = self.rpc.call_blockdev_addchildren(self.target_node,
12051                                                   (dev, self.instance), new_lvs)
12052       msg = result.fail_msg
12053       if msg:
12054         for new_lv in new_lvs:
12055           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12056                                                new_lv).fail_msg
12057           if msg2:
12058             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12059                                hint=("cleanup manually the unused logical"
12060                                      "volumes"))
12061         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12062
12063     cstep = itertools.count(5)
12064
12065     if self.early_release:
12066       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12067       self._RemoveOldStorage(self.target_node, iv_names)
12068       # TODO: Check if releasing locks early still makes sense
12069       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12070     else:
12071       # Release all resource locks except those used by the instance
12072       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12073                     keep=self.node_secondary_ip.keys())
12074
12075     # Release all node locks while waiting for sync
12076     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12077
12078     # TODO: Can the instance lock be downgraded here? Take the optional disk
12079     # shutdown in the caller into consideration.
12080
12081     # Wait for sync
12082     # This can fail as the old devices are degraded and _WaitForSync
12083     # does a combined result over all disks, so we don't check its return value
12084     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12085     _WaitForSync(self.lu, self.instance)
12086
12087     # Check all devices manually
12088     self._CheckDevices(self.instance.primary_node, iv_names)
12089
12090     # Step: remove old storage
12091     if not self.early_release:
12092       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12093       self._RemoveOldStorage(self.target_node, iv_names)
12094
12095   def _ExecDrbd8Secondary(self, feedback_fn):
12096     """Replace the secondary node for DRBD 8.
12097
12098     The algorithm for replace is quite complicated:
12099       - for all disks of the instance:
12100         - create new LVs on the new node with same names
12101         - shutdown the drbd device on the old secondary
12102         - disconnect the drbd network on the primary
12103         - create the drbd device on the new secondary
12104         - network attach the drbd on the primary, using an artifice:
12105           the drbd code for Attach() will connect to the network if it
12106           finds a device which is connected to the good local disks but
12107           not network enabled
12108       - wait for sync across all devices
12109       - remove all disks from the old secondary
12110
12111     Failures are not very well handled.
12112
12113     """
12114     steps_total = 6
12115
12116     pnode = self.instance.primary_node
12117
12118     # Step: check device activation
12119     self.lu.LogStep(1, steps_total, "Check device existence")
12120     self._CheckDisksExistence([self.instance.primary_node])
12121     self._CheckVolumeGroup([self.instance.primary_node])
12122
12123     # Step: check other node consistency
12124     self.lu.LogStep(2, steps_total, "Check peer consistency")
12125     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12126
12127     # Step: create new storage
12128     self.lu.LogStep(3, steps_total, "Allocate new storage")
12129     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12130     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12131     for idx, dev in enumerate(disks):
12132       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12133                       (self.new_node, idx))
12134       # we pass force_create=True to force LVM creation
12135       for new_lv in dev.children:
12136         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12137                              True, _GetInstanceInfoText(self.instance), False,
12138                              excl_stor)
12139
12140     # Step 4: dbrd minors and drbd setups changes
12141     # after this, we must manually remove the drbd minors on both the
12142     # error and the success paths
12143     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12144     minors = self.cfg.AllocateDRBDMinor([self.new_node
12145                                          for dev in self.instance.disks],
12146                                         self.instance.name)
12147     logging.debug("Allocated minors %r", minors)
12148
12149     iv_names = {}
12150     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12151       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12152                       (self.new_node, idx))
12153       # create new devices on new_node; note that we create two IDs:
12154       # one without port, so the drbd will be activated without
12155       # networking information on the new node at this stage, and one
12156       # with network, for the latter activation in step 4
12157       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12158       if self.instance.primary_node == o_node1:
12159         p_minor = o_minor1
12160       else:
12161         assert self.instance.primary_node == o_node2, "Three-node instance?"
12162         p_minor = o_minor2
12163
12164       new_alone_id = (self.instance.primary_node, self.new_node, None,
12165                       p_minor, new_minor, o_secret)
12166       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12167                     p_minor, new_minor, o_secret)
12168
12169       iv_names[idx] = (dev, dev.children, new_net_id)
12170       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12171                     new_net_id)
12172       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12173                               logical_id=new_alone_id,
12174                               children=dev.children,
12175                               size=dev.size,
12176                               params={})
12177       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12178                                              self.cfg)
12179       try:
12180         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12181                               anno_new_drbd,
12182                               _GetInstanceInfoText(self.instance), False,
12183                               excl_stor)
12184       except errors.GenericError:
12185         self.cfg.ReleaseDRBDMinors(self.instance.name)
12186         raise
12187
12188     # We have new devices, shutdown the drbd on the old secondary
12189     for idx, dev in enumerate(self.instance.disks):
12190       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12191       self.cfg.SetDiskID(dev, self.target_node)
12192       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12193                                             (dev, self.instance)).fail_msg
12194       if msg:
12195         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12196                            "node: %s" % (idx, msg),
12197                            hint=("Please cleanup this device manually as"
12198                                  " soon as possible"))
12199
12200     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12201     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12202                                                self.instance.disks)[pnode]
12203
12204     msg = result.fail_msg
12205     if msg:
12206       # detaches didn't succeed (unlikely)
12207       self.cfg.ReleaseDRBDMinors(self.instance.name)
12208       raise errors.OpExecError("Can't detach the disks from the network on"
12209                                " old node: %s" % (msg,))
12210
12211     # if we managed to detach at least one, we update all the disks of
12212     # the instance to point to the new secondary
12213     self.lu.LogInfo("Updating instance configuration")
12214     for dev, _, new_logical_id in iv_names.itervalues():
12215       dev.logical_id = new_logical_id
12216       self.cfg.SetDiskID(dev, self.instance.primary_node)
12217
12218     self.cfg.Update(self.instance, feedback_fn)
12219
12220     # Release all node locks (the configuration has been updated)
12221     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12222
12223     # and now perform the drbd attach
12224     self.lu.LogInfo("Attaching primary drbds to new secondary"
12225                     " (standalone => connected)")
12226     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12227                                             self.new_node],
12228                                            self.node_secondary_ip,
12229                                            (self.instance.disks, self.instance),
12230                                            self.instance.name,
12231                                            False)
12232     for to_node, to_result in result.items():
12233       msg = to_result.fail_msg
12234       if msg:
12235         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12236                            to_node, msg,
12237                            hint=("please do a gnt-instance info to see the"
12238                                  " status of disks"))
12239
12240     cstep = itertools.count(5)
12241
12242     if self.early_release:
12243       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12244       self._RemoveOldStorage(self.target_node, iv_names)
12245       # TODO: Check if releasing locks early still makes sense
12246       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12247     else:
12248       # Release all resource locks except those used by the instance
12249       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12250                     keep=self.node_secondary_ip.keys())
12251
12252     # TODO: Can the instance lock be downgraded here? Take the optional disk
12253     # shutdown in the caller into consideration.
12254
12255     # Wait for sync
12256     # This can fail as the old devices are degraded and _WaitForSync
12257     # does a combined result over all disks, so we don't check its return value
12258     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12259     _WaitForSync(self.lu, self.instance)
12260
12261     # Check all devices manually
12262     self._CheckDevices(self.instance.primary_node, iv_names)
12263
12264     # Step: remove old storage
12265     if not self.early_release:
12266       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12267       self._RemoveOldStorage(self.target_node, iv_names)
12268
12269
12270 class LURepairNodeStorage(NoHooksLU):
12271   """Repairs the volume group on a node.
12272
12273   """
12274   REQ_BGL = False
12275
12276   def CheckArguments(self):
12277     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12278
12279     storage_type = self.op.storage_type
12280
12281     if (constants.SO_FIX_CONSISTENCY not in
12282         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12283       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12284                                  " repaired" % storage_type,
12285                                  errors.ECODE_INVAL)
12286
12287   def ExpandNames(self):
12288     self.needed_locks = {
12289       locking.LEVEL_NODE: [self.op.node_name],
12290       }
12291
12292   def _CheckFaultyDisks(self, instance, node_name):
12293     """Ensure faulty disks abort the opcode or at least warn."""
12294     try:
12295       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12296                                   node_name, True):
12297         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12298                                    " node '%s'" % (instance.name, node_name),
12299                                    errors.ECODE_STATE)
12300     except errors.OpPrereqError, err:
12301       if self.op.ignore_consistency:
12302         self.LogWarning(str(err.args[0]))
12303       else:
12304         raise
12305
12306   def CheckPrereq(self):
12307     """Check prerequisites.
12308
12309     """
12310     # Check whether any instance on this node has faulty disks
12311     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12312       if inst.admin_state != constants.ADMINST_UP:
12313         continue
12314       check_nodes = set(inst.all_nodes)
12315       check_nodes.discard(self.op.node_name)
12316       for inst_node_name in check_nodes:
12317         self._CheckFaultyDisks(inst, inst_node_name)
12318
12319   def Exec(self, feedback_fn):
12320     feedback_fn("Repairing storage unit '%s' on %s ..." %
12321                 (self.op.name, self.op.node_name))
12322
12323     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12324     result = self.rpc.call_storage_execute(self.op.node_name,
12325                                            self.op.storage_type, st_args,
12326                                            self.op.name,
12327                                            constants.SO_FIX_CONSISTENCY)
12328     result.Raise("Failed to repair storage unit '%s' on %s" %
12329                  (self.op.name, self.op.node_name))
12330
12331
12332 class LUNodeEvacuate(NoHooksLU):
12333   """Evacuates instances off a list of nodes.
12334
12335   """
12336   REQ_BGL = False
12337
12338   _MODE2IALLOCATOR = {
12339     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12340     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12341     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12342     }
12343   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12344   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12345           constants.IALLOCATOR_NEVAC_MODES)
12346
12347   def CheckArguments(self):
12348     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12349
12350   def ExpandNames(self):
12351     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12352
12353     if self.op.remote_node is not None:
12354       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12355       assert self.op.remote_node
12356
12357       if self.op.remote_node == self.op.node_name:
12358         raise errors.OpPrereqError("Can not use evacuated node as a new"
12359                                    " secondary node", errors.ECODE_INVAL)
12360
12361       if self.op.mode != constants.NODE_EVAC_SEC:
12362         raise errors.OpPrereqError("Without the use of an iallocator only"
12363                                    " secondary instances can be evacuated",
12364                                    errors.ECODE_INVAL)
12365
12366     # Declare locks
12367     self.share_locks = _ShareAll()
12368     self.needed_locks = {
12369       locking.LEVEL_INSTANCE: [],
12370       locking.LEVEL_NODEGROUP: [],
12371       locking.LEVEL_NODE: [],
12372       }
12373
12374     # Determine nodes (via group) optimistically, needs verification once locks
12375     # have been acquired
12376     self.lock_nodes = self._DetermineNodes()
12377
12378   def _DetermineNodes(self):
12379     """Gets the list of nodes to operate on.
12380
12381     """
12382     if self.op.remote_node is None:
12383       # Iallocator will choose any node(s) in the same group
12384       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12385     else:
12386       group_nodes = frozenset([self.op.remote_node])
12387
12388     # Determine nodes to be locked
12389     return set([self.op.node_name]) | group_nodes
12390
12391   def _DetermineInstances(self):
12392     """Builds list of instances to operate on.
12393
12394     """
12395     assert self.op.mode in constants.NODE_EVAC_MODES
12396
12397     if self.op.mode == constants.NODE_EVAC_PRI:
12398       # Primary instances only
12399       inst_fn = _GetNodePrimaryInstances
12400       assert self.op.remote_node is None, \
12401         "Evacuating primary instances requires iallocator"
12402     elif self.op.mode == constants.NODE_EVAC_SEC:
12403       # Secondary instances only
12404       inst_fn = _GetNodeSecondaryInstances
12405     else:
12406       # All instances
12407       assert self.op.mode == constants.NODE_EVAC_ALL
12408       inst_fn = _GetNodeInstances
12409       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12410       # per instance
12411       raise errors.OpPrereqError("Due to an issue with the iallocator"
12412                                  " interface it is not possible to evacuate"
12413                                  " all instances at once; specify explicitly"
12414                                  " whether to evacuate primary or secondary"
12415                                  " instances",
12416                                  errors.ECODE_INVAL)
12417
12418     return inst_fn(self.cfg, self.op.node_name)
12419
12420   def DeclareLocks(self, level):
12421     if level == locking.LEVEL_INSTANCE:
12422       # Lock instances optimistically, needs verification once node and group
12423       # locks have been acquired
12424       self.needed_locks[locking.LEVEL_INSTANCE] = \
12425         set(i.name for i in self._DetermineInstances())
12426
12427     elif level == locking.LEVEL_NODEGROUP:
12428       # Lock node groups for all potential target nodes optimistically, needs
12429       # verification once nodes have been acquired
12430       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12431         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12432
12433     elif level == locking.LEVEL_NODE:
12434       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12435
12436   def CheckPrereq(self):
12437     # Verify locks
12438     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12439     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12440     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12441
12442     need_nodes = self._DetermineNodes()
12443
12444     if not owned_nodes.issuperset(need_nodes):
12445       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12446                                  " locks were acquired, current nodes are"
12447                                  " are '%s', used to be '%s'; retry the"
12448                                  " operation" %
12449                                  (self.op.node_name,
12450                                   utils.CommaJoin(need_nodes),
12451                                   utils.CommaJoin(owned_nodes)),
12452                                  errors.ECODE_STATE)
12453
12454     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12455     if owned_groups != wanted_groups:
12456       raise errors.OpExecError("Node groups changed since locks were acquired,"
12457                                " current groups are '%s', used to be '%s';"
12458                                " retry the operation" %
12459                                (utils.CommaJoin(wanted_groups),
12460                                 utils.CommaJoin(owned_groups)))
12461
12462     # Determine affected instances
12463     self.instances = self._DetermineInstances()
12464     self.instance_names = [i.name for i in self.instances]
12465
12466     if set(self.instance_names) != owned_instances:
12467       raise errors.OpExecError("Instances on node '%s' changed since locks"
12468                                " were acquired, current instances are '%s',"
12469                                " used to be '%s'; retry the operation" %
12470                                (self.op.node_name,
12471                                 utils.CommaJoin(self.instance_names),
12472                                 utils.CommaJoin(owned_instances)))
12473
12474     if self.instance_names:
12475       self.LogInfo("Evacuating instances from node '%s': %s",
12476                    self.op.node_name,
12477                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12478     else:
12479       self.LogInfo("No instances to evacuate from node '%s'",
12480                    self.op.node_name)
12481
12482     if self.op.remote_node is not None:
12483       for i in self.instances:
12484         if i.primary_node == self.op.remote_node:
12485           raise errors.OpPrereqError("Node %s is the primary node of"
12486                                      " instance %s, cannot use it as"
12487                                      " secondary" %
12488                                      (self.op.remote_node, i.name),
12489                                      errors.ECODE_INVAL)
12490
12491   def Exec(self, feedback_fn):
12492     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12493
12494     if not self.instance_names:
12495       # No instances to evacuate
12496       jobs = []
12497
12498     elif self.op.iallocator is not None:
12499       # TODO: Implement relocation to other group
12500       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12501       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12502                                      instances=list(self.instance_names))
12503       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12504
12505       ial.Run(self.op.iallocator)
12506
12507       if not ial.success:
12508         raise errors.OpPrereqError("Can't compute node evacuation using"
12509                                    " iallocator '%s': %s" %
12510                                    (self.op.iallocator, ial.info),
12511                                    errors.ECODE_NORES)
12512
12513       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12514
12515     elif self.op.remote_node is not None:
12516       assert self.op.mode == constants.NODE_EVAC_SEC
12517       jobs = [
12518         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12519                                         remote_node=self.op.remote_node,
12520                                         disks=[],
12521                                         mode=constants.REPLACE_DISK_CHG,
12522                                         early_release=self.op.early_release)]
12523         for instance_name in self.instance_names]
12524
12525     else:
12526       raise errors.ProgrammerError("No iallocator or remote node")
12527
12528     return ResultWithJobs(jobs)
12529
12530
12531 def _SetOpEarlyRelease(early_release, op):
12532   """Sets C{early_release} flag on opcodes if available.
12533
12534   """
12535   try:
12536     op.early_release = early_release
12537   except AttributeError:
12538     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12539
12540   return op
12541
12542
12543 def _NodeEvacDest(use_nodes, group, nodes):
12544   """Returns group or nodes depending on caller's choice.
12545
12546   """
12547   if use_nodes:
12548     return utils.CommaJoin(nodes)
12549   else:
12550     return group
12551
12552
12553 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12554   """Unpacks the result of change-group and node-evacuate iallocator requests.
12555
12556   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12557   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12558
12559   @type lu: L{LogicalUnit}
12560   @param lu: Logical unit instance
12561   @type alloc_result: tuple/list
12562   @param alloc_result: Result from iallocator
12563   @type early_release: bool
12564   @param early_release: Whether to release locks early if possible
12565   @type use_nodes: bool
12566   @param use_nodes: Whether to display node names instead of groups
12567
12568   """
12569   (moved, failed, jobs) = alloc_result
12570
12571   if failed:
12572     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12573                                  for (name, reason) in failed)
12574     lu.LogWarning("Unable to evacuate instances %s", failreason)
12575     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12576
12577   if moved:
12578     lu.LogInfo("Instances to be moved: %s",
12579                utils.CommaJoin("%s (to %s)" %
12580                                (name, _NodeEvacDest(use_nodes, group, nodes))
12581                                for (name, group, nodes) in moved))
12582
12583   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12584               map(opcodes.OpCode.LoadOpCode, ops))
12585           for ops in jobs]
12586
12587
12588 def _DiskSizeInBytesToMebibytes(lu, size):
12589   """Converts a disk size in bytes to mebibytes.
12590
12591   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12592
12593   """
12594   (mib, remainder) = divmod(size, 1024 * 1024)
12595
12596   if remainder != 0:
12597     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12598                   " to not overwrite existing data (%s bytes will not be"
12599                   " wiped)", (1024 * 1024) - remainder)
12600     mib += 1
12601
12602   return mib
12603
12604
12605 class LUInstanceGrowDisk(LogicalUnit):
12606   """Grow a disk of an instance.
12607
12608   """
12609   HPATH = "disk-grow"
12610   HTYPE = constants.HTYPE_INSTANCE
12611   REQ_BGL = False
12612
12613   def ExpandNames(self):
12614     self._ExpandAndLockInstance()
12615     self.needed_locks[locking.LEVEL_NODE] = []
12616     self.needed_locks[locking.LEVEL_NODE_RES] = []
12617     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12618     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12619
12620   def DeclareLocks(self, level):
12621     if level == locking.LEVEL_NODE:
12622       self._LockInstancesNodes()
12623     elif level == locking.LEVEL_NODE_RES:
12624       # Copy node locks
12625       self.needed_locks[locking.LEVEL_NODE_RES] = \
12626         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12627
12628   def BuildHooksEnv(self):
12629     """Build hooks env.
12630
12631     This runs on the master, the primary and all the secondaries.
12632
12633     """
12634     env = {
12635       "DISK": self.op.disk,
12636       "AMOUNT": self.op.amount,
12637       "ABSOLUTE": self.op.absolute,
12638       }
12639     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12640     return env
12641
12642   def BuildHooksNodes(self):
12643     """Build hooks nodes.
12644
12645     """
12646     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12647     return (nl, nl)
12648
12649   def CheckPrereq(self):
12650     """Check prerequisites.
12651
12652     This checks that the instance is in the cluster.
12653
12654     """
12655     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12656     assert instance is not None, \
12657       "Cannot retrieve locked instance %s" % self.op.instance_name
12658     nodenames = list(instance.all_nodes)
12659     for node in nodenames:
12660       _CheckNodeOnline(self, node)
12661
12662     self.instance = instance
12663
12664     if instance.disk_template not in constants.DTS_GROWABLE:
12665       raise errors.OpPrereqError("Instance's disk layout does not support"
12666                                  " growing", errors.ECODE_INVAL)
12667
12668     self.disk = instance.FindDisk(self.op.disk)
12669
12670     if self.op.absolute:
12671       self.target = self.op.amount
12672       self.delta = self.target - self.disk.size
12673       if self.delta < 0:
12674         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12675                                    "current disk size (%s)" %
12676                                    (utils.FormatUnit(self.target, "h"),
12677                                     utils.FormatUnit(self.disk.size, "h")),
12678                                    errors.ECODE_STATE)
12679     else:
12680       self.delta = self.op.amount
12681       self.target = self.disk.size + self.delta
12682       if self.delta < 0:
12683         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12684                                    utils.FormatUnit(self.delta, "h"),
12685                                    errors.ECODE_INVAL)
12686
12687     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12688
12689   def _CheckDiskSpace(self, nodenames, req_vgspace):
12690     template = self.instance.disk_template
12691     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12692       # TODO: check the free disk space for file, when that feature will be
12693       # supported
12694       nodes = map(self.cfg.GetNodeInfo, nodenames)
12695       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12696                         nodes)
12697       if es_nodes:
12698         # With exclusive storage we need to something smarter than just looking
12699         # at free space; for now, let's simply abort the operation.
12700         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12701                                    " is enabled", errors.ECODE_STATE)
12702       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12703
12704   def Exec(self, feedback_fn):
12705     """Execute disk grow.
12706
12707     """
12708     instance = self.instance
12709     disk = self.disk
12710
12711     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12712     assert (self.owned_locks(locking.LEVEL_NODE) ==
12713             self.owned_locks(locking.LEVEL_NODE_RES))
12714
12715     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12716
12717     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12718     if not disks_ok:
12719       raise errors.OpExecError("Cannot activate block device to grow")
12720
12721     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12722                 (self.op.disk, instance.name,
12723                  utils.FormatUnit(self.delta, "h"),
12724                  utils.FormatUnit(self.target, "h")))
12725
12726     # First run all grow ops in dry-run mode
12727     for node in instance.all_nodes:
12728       self.cfg.SetDiskID(disk, node)
12729       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12730                                            True, True)
12731       result.Raise("Dry-run grow request failed to node %s" % node)
12732
12733     if wipe_disks:
12734       # Get disk size from primary node for wiping
12735       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12736       result.Raise("Failed to retrieve disk size from node '%s'" %
12737                    instance.primary_node)
12738
12739       (disk_size_in_bytes, ) = result.payload
12740
12741       if disk_size_in_bytes is None:
12742         raise errors.OpExecError("Failed to retrieve disk size from primary"
12743                                  " node '%s'" % instance.primary_node)
12744
12745       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12746
12747       assert old_disk_size >= disk.size, \
12748         ("Retrieved disk size too small (got %s, should be at least %s)" %
12749          (old_disk_size, disk.size))
12750     else:
12751       old_disk_size = None
12752
12753     # We know that (as far as we can test) operations across different
12754     # nodes will succeed, time to run it for real on the backing storage
12755     for node in instance.all_nodes:
12756       self.cfg.SetDiskID(disk, node)
12757       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12758                                            False, True)
12759       result.Raise("Grow request failed to node %s" % node)
12760
12761     # And now execute it for logical storage, on the primary node
12762     node = instance.primary_node
12763     self.cfg.SetDiskID(disk, node)
12764     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12765                                          False, False)
12766     result.Raise("Grow request failed to node %s" % node)
12767
12768     disk.RecordGrow(self.delta)
12769     self.cfg.Update(instance, feedback_fn)
12770
12771     # Changes have been recorded, release node lock
12772     _ReleaseLocks(self, locking.LEVEL_NODE)
12773
12774     # Downgrade lock while waiting for sync
12775     self.glm.downgrade(locking.LEVEL_INSTANCE)
12776
12777     assert wipe_disks ^ (old_disk_size is None)
12778
12779     if wipe_disks:
12780       assert instance.disks[self.op.disk] == disk
12781
12782       # Wipe newly added disk space
12783       _WipeDisks(self, instance,
12784                  disks=[(self.op.disk, disk, old_disk_size)])
12785
12786     if self.op.wait_for_sync:
12787       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12788       if disk_abort:
12789         self.LogWarning("Disk syncing has not returned a good status; check"
12790                         " the instance")
12791       if instance.admin_state != constants.ADMINST_UP:
12792         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12793     elif instance.admin_state != constants.ADMINST_UP:
12794       self.LogWarning("Not shutting down the disk even if the instance is"
12795                       " not supposed to be running because no wait for"
12796                       " sync mode was requested")
12797
12798     assert self.owned_locks(locking.LEVEL_NODE_RES)
12799     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12800
12801
12802 class LUInstanceQueryData(NoHooksLU):
12803   """Query runtime instance data.
12804
12805   """
12806   REQ_BGL = False
12807
12808   def ExpandNames(self):
12809     self.needed_locks = {}
12810
12811     # Use locking if requested or when non-static information is wanted
12812     if not (self.op.static or self.op.use_locking):
12813       self.LogWarning("Non-static data requested, locks need to be acquired")
12814       self.op.use_locking = True
12815
12816     if self.op.instances or not self.op.use_locking:
12817       # Expand instance names right here
12818       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12819     else:
12820       # Will use acquired locks
12821       self.wanted_names = None
12822
12823     if self.op.use_locking:
12824       self.share_locks = _ShareAll()
12825
12826       if self.wanted_names is None:
12827         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12828       else:
12829         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12830
12831       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12832       self.needed_locks[locking.LEVEL_NODE] = []
12833       self.needed_locks[locking.LEVEL_NETWORK] = []
12834       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12835
12836   def DeclareLocks(self, level):
12837     if self.op.use_locking:
12838       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12839       if level == locking.LEVEL_NODEGROUP:
12840
12841         # Lock all groups used by instances optimistically; this requires going
12842         # via the node before it's locked, requiring verification later on
12843         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12844           frozenset(group_uuid
12845                     for instance_name in owned_instances
12846                     for group_uuid in
12847                       self.cfg.GetInstanceNodeGroups(instance_name))
12848
12849       elif level == locking.LEVEL_NODE:
12850         self._LockInstancesNodes()
12851
12852       elif level == locking.LEVEL_NETWORK:
12853         self.needed_locks[locking.LEVEL_NETWORK] = \
12854           frozenset(net_uuid
12855                     for instance_name in owned_instances
12856                     for net_uuid in
12857                        self.cfg.GetInstanceNetworks(instance_name))
12858
12859   def CheckPrereq(self):
12860     """Check prerequisites.
12861
12862     This only checks the optional instance list against the existing names.
12863
12864     """
12865     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12866     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12867     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12868     owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12869
12870     if self.wanted_names is None:
12871       assert self.op.use_locking, "Locking was not used"
12872       self.wanted_names = owned_instances
12873
12874     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12875
12876     if self.op.use_locking:
12877       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12878                                 None)
12879     else:
12880       assert not (owned_instances or owned_groups or
12881                   owned_nodes or owned_networks)
12882
12883     self.wanted_instances = instances.values()
12884
12885   def _ComputeBlockdevStatus(self, node, instance, dev):
12886     """Returns the status of a block device
12887
12888     """
12889     if self.op.static or not node:
12890       return None
12891
12892     self.cfg.SetDiskID(dev, node)
12893
12894     result = self.rpc.call_blockdev_find(node, dev)
12895     if result.offline:
12896       return None
12897
12898     result.Raise("Can't compute disk status for %s" % instance.name)
12899
12900     status = result.payload
12901     if status is None:
12902       return None
12903
12904     return (status.dev_path, status.major, status.minor,
12905             status.sync_percent, status.estimated_time,
12906             status.is_degraded, status.ldisk_status)
12907
12908   def _ComputeDiskStatus(self, instance, snode, dev):
12909     """Compute block device status.
12910
12911     """
12912     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12913
12914     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12915
12916   def _ComputeDiskStatusInner(self, instance, snode, dev):
12917     """Compute block device status.
12918
12919     @attention: The device has to be annotated already.
12920
12921     """
12922     if dev.dev_type in constants.LDS_DRBD:
12923       # we change the snode then (otherwise we use the one passed in)
12924       if dev.logical_id[0] == instance.primary_node:
12925         snode = dev.logical_id[1]
12926       else:
12927         snode = dev.logical_id[0]
12928
12929     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12930                                               instance, dev)
12931     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12932
12933     if dev.children:
12934       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12935                                         instance, snode),
12936                          dev.children)
12937     else:
12938       dev_children = []
12939
12940     return {
12941       "iv_name": dev.iv_name,
12942       "dev_type": dev.dev_type,
12943       "logical_id": dev.logical_id,
12944       "physical_id": dev.physical_id,
12945       "pstatus": dev_pstatus,
12946       "sstatus": dev_sstatus,
12947       "children": dev_children,
12948       "mode": dev.mode,
12949       "size": dev.size,
12950       }
12951
12952   def Exec(self, feedback_fn):
12953     """Gather and return data"""
12954     result = {}
12955
12956     cluster = self.cfg.GetClusterInfo()
12957
12958     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12959     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12960
12961     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12962                                                  for node in nodes.values()))
12963
12964     group2name_fn = lambda uuid: groups[uuid].name
12965     for instance in self.wanted_instances:
12966       pnode = nodes[instance.primary_node]
12967
12968       if self.op.static or pnode.offline:
12969         remote_state = None
12970         if pnode.offline:
12971           self.LogWarning("Primary node %s is marked offline, returning static"
12972                           " information only for instance %s" %
12973                           (pnode.name, instance.name))
12974       else:
12975         remote_info = self.rpc.call_instance_info(instance.primary_node,
12976                                                   instance.name,
12977                                                   instance.hypervisor)
12978         remote_info.Raise("Error checking node %s" % instance.primary_node)
12979         remote_info = remote_info.payload
12980         if remote_info and "state" in remote_info:
12981           remote_state = "up"
12982         else:
12983           if instance.admin_state == constants.ADMINST_UP:
12984             remote_state = "down"
12985           else:
12986             remote_state = instance.admin_state
12987
12988       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12989                   instance.disks)
12990
12991       snodes_group_uuids = [nodes[snode_name].group
12992                             for snode_name in instance.secondary_nodes]
12993
12994       result[instance.name] = {
12995         "name": instance.name,
12996         "config_state": instance.admin_state,
12997         "run_state": remote_state,
12998         "pnode": instance.primary_node,
12999         "pnode_group_uuid": pnode.group,
13000         "pnode_group_name": group2name_fn(pnode.group),
13001         "snodes": instance.secondary_nodes,
13002         "snodes_group_uuids": snodes_group_uuids,
13003         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13004         "os": instance.os,
13005         # this happens to be the same format used for hooks
13006         "nics": _NICListToTuple(self, instance.nics),
13007         "disk_template": instance.disk_template,
13008         "disks": disks,
13009         "hypervisor": instance.hypervisor,
13010         "network_port": instance.network_port,
13011         "hv_instance": instance.hvparams,
13012         "hv_actual": cluster.FillHV(instance, skip_globals=True),
13013         "be_instance": instance.beparams,
13014         "be_actual": cluster.FillBE(instance),
13015         "os_instance": instance.osparams,
13016         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13017         "serial_no": instance.serial_no,
13018         "mtime": instance.mtime,
13019         "ctime": instance.ctime,
13020         "uuid": instance.uuid,
13021         }
13022
13023     return result
13024
13025
13026 def PrepareContainerMods(mods, private_fn):
13027   """Prepares a list of container modifications by adding a private data field.
13028
13029   @type mods: list of tuples; (operation, index, parameters)
13030   @param mods: List of modifications
13031   @type private_fn: callable or None
13032   @param private_fn: Callable for constructing a private data field for a
13033     modification
13034   @rtype: list
13035
13036   """
13037   if private_fn is None:
13038     fn = lambda: None
13039   else:
13040     fn = private_fn
13041
13042   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13043
13044
13045 #: Type description for changes as returned by L{ApplyContainerMods}'s
13046 #: callbacks
13047 _TApplyContModsCbChanges = \
13048   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13049     ht.TNonEmptyString,
13050     ht.TAny,
13051     ])))
13052
13053
13054 def ApplyContainerMods(kind, container, chgdesc, mods,
13055                        create_fn, modify_fn, remove_fn):
13056   """Applies descriptions in C{mods} to C{container}.
13057
13058   @type kind: string
13059   @param kind: One-word item description
13060   @type container: list
13061   @param container: Container to modify
13062   @type chgdesc: None or list
13063   @param chgdesc: List of applied changes
13064   @type mods: list
13065   @param mods: Modifications as returned by L{PrepareContainerMods}
13066   @type create_fn: callable
13067   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13068     receives absolute item index, parameters and private data object as added
13069     by L{PrepareContainerMods}, returns tuple containing new item and changes
13070     as list
13071   @type modify_fn: callable
13072   @param modify_fn: Callback for modifying an existing item
13073     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13074     and private data object as added by L{PrepareContainerMods}, returns
13075     changes as list
13076   @type remove_fn: callable
13077   @param remove_fn: Callback on removing item; receives absolute item index,
13078     item and private data object as added by L{PrepareContainerMods}
13079
13080   """
13081   for (op, idx, params, private) in mods:
13082     if idx == -1:
13083       # Append
13084       absidx = len(container) - 1
13085     elif idx < 0:
13086       raise IndexError("Not accepting negative indices other than -1")
13087     elif idx > len(container):
13088       raise IndexError("Got %s index %s, but there are only %s" %
13089                        (kind, idx, len(container)))
13090     else:
13091       absidx = idx
13092
13093     changes = None
13094
13095     if op == constants.DDM_ADD:
13096       # Calculate where item will be added
13097       if idx == -1:
13098         addidx = len(container)
13099       else:
13100         addidx = idx
13101
13102       if create_fn is None:
13103         item = params
13104       else:
13105         (item, changes) = create_fn(addidx, params, private)
13106
13107       if idx == -1:
13108         container.append(item)
13109       else:
13110         assert idx >= 0
13111         assert idx <= len(container)
13112         # list.insert does so before the specified index
13113         container.insert(idx, item)
13114     else:
13115       # Retrieve existing item
13116       try:
13117         item = container[absidx]
13118       except IndexError:
13119         raise IndexError("Invalid %s index %s" % (kind, idx))
13120
13121       if op == constants.DDM_REMOVE:
13122         assert not params
13123
13124         if remove_fn is not None:
13125           remove_fn(absidx, item, private)
13126
13127         changes = [("%s/%s" % (kind, absidx), "remove")]
13128
13129         assert container[absidx] == item
13130         del container[absidx]
13131       elif op == constants.DDM_MODIFY:
13132         if modify_fn is not None:
13133           changes = modify_fn(absidx, item, params, private)
13134       else:
13135         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13136
13137     assert _TApplyContModsCbChanges(changes)
13138
13139     if not (chgdesc is None or changes is None):
13140       chgdesc.extend(changes)
13141
13142
13143 def _UpdateIvNames(base_index, disks):
13144   """Updates the C{iv_name} attribute of disks.
13145
13146   @type disks: list of L{objects.Disk}
13147
13148   """
13149   for (idx, disk) in enumerate(disks):
13150     disk.iv_name = "disk/%s" % (base_index + idx, )
13151
13152
13153 class _InstNicModPrivate:
13154   """Data structure for network interface modifications.
13155
13156   Used by L{LUInstanceSetParams}.
13157
13158   """
13159   def __init__(self):
13160     self.params = None
13161     self.filled = None
13162
13163
13164 class LUInstanceSetParams(LogicalUnit):
13165   """Modifies an instances's parameters.
13166
13167   """
13168   HPATH = "instance-modify"
13169   HTYPE = constants.HTYPE_INSTANCE
13170   REQ_BGL = False
13171
13172   @staticmethod
13173   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13174     assert ht.TList(mods)
13175     assert not mods or len(mods[0]) in (2, 3)
13176
13177     if mods and len(mods[0]) == 2:
13178       result = []
13179
13180       addremove = 0
13181       for op, params in mods:
13182         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13183           result.append((op, -1, params))
13184           addremove += 1
13185
13186           if addremove > 1:
13187             raise errors.OpPrereqError("Only one %s add or remove operation is"
13188                                        " supported at a time" % kind,
13189                                        errors.ECODE_INVAL)
13190         else:
13191           result.append((constants.DDM_MODIFY, op, params))
13192
13193       assert verify_fn(result)
13194     else:
13195       result = mods
13196
13197     return result
13198
13199   @staticmethod
13200   def _CheckMods(kind, mods, key_types, item_fn):
13201     """Ensures requested disk/NIC modifications are valid.
13202
13203     """
13204     for (op, _, params) in mods:
13205       assert ht.TDict(params)
13206
13207       # If 'key_types' is an empty dict, we assume we have an
13208       # 'ext' template and thus do not ForceDictType
13209       if key_types:
13210         utils.ForceDictType(params, key_types)
13211
13212       if op == constants.DDM_REMOVE:
13213         if params:
13214           raise errors.OpPrereqError("No settings should be passed when"
13215                                      " removing a %s" % kind,
13216                                      errors.ECODE_INVAL)
13217       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13218         item_fn(op, params)
13219       else:
13220         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13221
13222   @staticmethod
13223   def _VerifyDiskModification(op, params):
13224     """Verifies a disk modification.
13225
13226     """
13227     if op == constants.DDM_ADD:
13228       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13229       if mode not in constants.DISK_ACCESS_SET:
13230         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13231                                    errors.ECODE_INVAL)
13232
13233       size = params.get(constants.IDISK_SIZE, None)
13234       if size is None:
13235         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13236                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13237
13238       try:
13239         size = int(size)
13240       except (TypeError, ValueError), err:
13241         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13242                                    errors.ECODE_INVAL)
13243
13244       params[constants.IDISK_SIZE] = size
13245
13246     elif op == constants.DDM_MODIFY:
13247       if constants.IDISK_SIZE in params:
13248         raise errors.OpPrereqError("Disk size change not possible, use"
13249                                    " grow-disk", errors.ECODE_INVAL)
13250       if constants.IDISK_MODE not in params:
13251         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13252                                    " modification supported, but missing",
13253                                    errors.ECODE_NOENT)
13254       if len(params) > 1:
13255         raise errors.OpPrereqError("Disk modification doesn't support"
13256                                    " additional arbitrary parameters",
13257                                    errors.ECODE_INVAL)
13258
13259   @staticmethod
13260   def _VerifyNicModification(op, params):
13261     """Verifies a network interface modification.
13262
13263     """
13264     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13265       ip = params.get(constants.INIC_IP, None)
13266       req_net = params.get(constants.INIC_NETWORK, None)
13267       link = params.get(constants.NIC_LINK, None)
13268       mode = params.get(constants.NIC_MODE, None)
13269       if req_net is not None:
13270         if req_net.lower() == constants.VALUE_NONE:
13271           params[constants.INIC_NETWORK] = None
13272           req_net = None
13273         elif link is not None or mode is not None:
13274           raise errors.OpPrereqError("If network is given"
13275                                      " mode or link should not",
13276                                      errors.ECODE_INVAL)
13277
13278       if op == constants.DDM_ADD:
13279         macaddr = params.get(constants.INIC_MAC, None)
13280         if macaddr is None:
13281           params[constants.INIC_MAC] = constants.VALUE_AUTO
13282
13283       if ip is not None:
13284         if ip.lower() == constants.VALUE_NONE:
13285           params[constants.INIC_IP] = None
13286         else:
13287           if ip.lower() == constants.NIC_IP_POOL:
13288             if op == constants.DDM_ADD and req_net is None:
13289               raise errors.OpPrereqError("If ip=pool, parameter network"
13290                                          " cannot be none",
13291                                          errors.ECODE_INVAL)
13292           else:
13293             if not netutils.IPAddress.IsValid(ip):
13294               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13295                                          errors.ECODE_INVAL)
13296
13297       if constants.INIC_MAC in params:
13298         macaddr = params[constants.INIC_MAC]
13299         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13300           macaddr = utils.NormalizeAndValidateMac(macaddr)
13301
13302         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13303           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13304                                      " modifying an existing NIC",
13305                                      errors.ECODE_INVAL)
13306
13307   def CheckArguments(self):
13308     if not (self.op.nics or self.op.disks or self.op.disk_template or
13309             self.op.hvparams or self.op.beparams or self.op.os_name or
13310             self.op.offline is not None or self.op.runtime_mem):
13311       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13312
13313     if self.op.hvparams:
13314       _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13315                             "hypervisor", "instance", "cluster")
13316
13317     self.op.disks = self._UpgradeDiskNicMods(
13318       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13319     self.op.nics = self._UpgradeDiskNicMods(
13320       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13321
13322     if self.op.disks and self.op.disk_template is not None:
13323       raise errors.OpPrereqError("Disk template conversion and other disk"
13324                                  " changes not supported at the same time",
13325                                  errors.ECODE_INVAL)
13326
13327     if (self.op.disk_template and
13328         self.op.disk_template in constants.DTS_INT_MIRROR and
13329         self.op.remote_node is None):
13330       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13331                                  " one requires specifying a secondary node",
13332                                  errors.ECODE_INVAL)
13333
13334     # Check NIC modifications
13335     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13336                     self._VerifyNicModification)
13337
13338   def ExpandNames(self):
13339     self._ExpandAndLockInstance()
13340     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13341     # Can't even acquire node locks in shared mode as upcoming changes in
13342     # Ganeti 2.6 will start to modify the node object on disk conversion
13343     self.needed_locks[locking.LEVEL_NODE] = []
13344     self.needed_locks[locking.LEVEL_NODE_RES] = []
13345     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13346     # Look node group to look up the ipolicy
13347     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13348
13349   def DeclareLocks(self, level):
13350     if level == locking.LEVEL_NODEGROUP:
13351       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13352       # Acquire locks for the instance's nodegroups optimistically. Needs
13353       # to be verified in CheckPrereq
13354       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13355         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13356     elif level == locking.LEVEL_NODE:
13357       self._LockInstancesNodes()
13358       if self.op.disk_template and self.op.remote_node:
13359         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13360         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13361     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13362       # Copy node locks
13363       self.needed_locks[locking.LEVEL_NODE_RES] = \
13364         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13365
13366   def BuildHooksEnv(self):
13367     """Build hooks env.
13368
13369     This runs on the master, primary and secondaries.
13370
13371     """
13372     args = {}
13373     if constants.BE_MINMEM in self.be_new:
13374       args["minmem"] = self.be_new[constants.BE_MINMEM]
13375     if constants.BE_MAXMEM in self.be_new:
13376       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13377     if constants.BE_VCPUS in self.be_new:
13378       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13379     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13380     # information at all.
13381
13382     if self._new_nics is not None:
13383       nics = []
13384
13385       for nic in self._new_nics:
13386         n = copy.deepcopy(nic)
13387         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13388         n.nicparams = nicparams
13389         nics.append(_NICToTuple(self, n))
13390
13391       args["nics"] = nics
13392
13393     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13394     if self.op.disk_template:
13395       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13396     if self.op.runtime_mem:
13397       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13398
13399     return env
13400
13401   def BuildHooksNodes(self):
13402     """Build hooks nodes.
13403
13404     """
13405     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13406     return (nl, nl)
13407
13408   def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13409                               old_params, cluster, pnode):
13410
13411     update_params_dict = dict([(key, params[key])
13412                                for key in constants.NICS_PARAMETERS
13413                                if key in params])
13414
13415     req_link = update_params_dict.get(constants.NIC_LINK, None)
13416     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13417
13418     new_net_uuid = None
13419     new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13420     if new_net_uuid_or_name:
13421       new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13422       new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13423
13424     if old_net_uuid:
13425       old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13426
13427     if new_net_uuid:
13428       netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13429       if not netparams:
13430         raise errors.OpPrereqError("No netparams found for the network"
13431                                    " %s, probably not connected" %
13432                                    new_net_obj.name, errors.ECODE_INVAL)
13433       new_params = dict(netparams)
13434     else:
13435       new_params = _GetUpdatedParams(old_params, update_params_dict)
13436
13437     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13438
13439     new_filled_params = cluster.SimpleFillNIC(new_params)
13440     objects.NIC.CheckParameterSyntax(new_filled_params)
13441
13442     new_mode = new_filled_params[constants.NIC_MODE]
13443     if new_mode == constants.NIC_MODE_BRIDGED:
13444       bridge = new_filled_params[constants.NIC_LINK]
13445       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13446       if msg:
13447         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13448         if self.op.force:
13449           self.warn.append(msg)
13450         else:
13451           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13452
13453     elif new_mode == constants.NIC_MODE_ROUTED:
13454       ip = params.get(constants.INIC_IP, old_ip)
13455       if ip is None:
13456         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13457                                    " on a routed NIC", errors.ECODE_INVAL)
13458
13459     elif new_mode == constants.NIC_MODE_OVS:
13460       # TODO: check OVS link
13461       self.LogInfo("OVS links are currently not checked for correctness")
13462
13463     if constants.INIC_MAC in params:
13464       mac = params[constants.INIC_MAC]
13465       if mac is None:
13466         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13467                                    errors.ECODE_INVAL)
13468       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13469         # otherwise generate the MAC address
13470         params[constants.INIC_MAC] = \
13471           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13472       else:
13473         # or validate/reserve the current one
13474         try:
13475           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13476         except errors.ReservationError:
13477           raise errors.OpPrereqError("MAC address '%s' already in use"
13478                                      " in cluster" % mac,
13479                                      errors.ECODE_NOTUNIQUE)
13480     elif new_net_uuid != old_net_uuid:
13481
13482       def get_net_prefix(net_uuid):
13483         mac_prefix = None
13484         if net_uuid:
13485           nobj = self.cfg.GetNetwork(net_uuid)
13486           mac_prefix = nobj.mac_prefix
13487
13488         return mac_prefix
13489
13490       new_prefix = get_net_prefix(new_net_uuid)
13491       old_prefix = get_net_prefix(old_net_uuid)
13492       if old_prefix != new_prefix:
13493         params[constants.INIC_MAC] = \
13494           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13495
13496     # if there is a change in (ip, network) tuple
13497     new_ip = params.get(constants.INIC_IP, old_ip)
13498     if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13499       if new_ip:
13500         # if IP is pool then require a network and generate one IP
13501         if new_ip.lower() == constants.NIC_IP_POOL:
13502           if new_net_uuid:
13503             try:
13504               new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13505             except errors.ReservationError:
13506               raise errors.OpPrereqError("Unable to get a free IP"
13507                                          " from the address pool",
13508                                          errors.ECODE_STATE)
13509             self.LogInfo("Chose IP %s from network %s",
13510                          new_ip,
13511                          new_net_obj.name)
13512             params[constants.INIC_IP] = new_ip
13513           else:
13514             raise errors.OpPrereqError("ip=pool, but no network found",
13515                                        errors.ECODE_INVAL)
13516         # Reserve new IP if in the new network if any
13517         elif new_net_uuid:
13518           try:
13519             self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13520             self.LogInfo("Reserving IP %s in network %s",
13521                          new_ip, new_net_obj.name)
13522           except errors.ReservationError:
13523             raise errors.OpPrereqError("IP %s not available in network %s" %
13524                                        (new_ip, new_net_obj.name),
13525                                        errors.ECODE_NOTUNIQUE)
13526         # new network is None so check if new IP is a conflicting IP
13527         elif self.op.conflicts_check:
13528           _CheckForConflictingIp(self, new_ip, pnode)
13529
13530       # release old IP if old network is not None
13531       if old_ip and old_net_uuid:
13532         try:
13533           self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13534         except errors.AddressPoolError:
13535           logging.warning("Release IP %s not contained in network %s",
13536                           old_ip, old_net_obj.name)
13537
13538     # there are no changes in (ip, network) tuple and old network is not None
13539     elif (old_net_uuid is not None and
13540           (req_link is not None or req_mode is not None)):
13541       raise errors.OpPrereqError("Not allowed to change link or mode of"
13542                                  " a NIC that is connected to a network",
13543                                  errors.ECODE_INVAL)
13544
13545     private.params = new_params
13546     private.filled = new_filled_params
13547
13548   def _PreCheckDiskTemplate(self, pnode_info):
13549     """CheckPrereq checks related to a new disk template."""
13550     # Arguments are passed to avoid configuration lookups
13551     instance = self.instance
13552     pnode = instance.primary_node
13553     cluster = self.cluster
13554     if instance.disk_template == self.op.disk_template:
13555       raise errors.OpPrereqError("Instance already has disk template %s" %
13556                                  instance.disk_template, errors.ECODE_INVAL)
13557
13558     if (instance.disk_template,
13559         self.op.disk_template) not in self._DISK_CONVERSIONS:
13560       raise errors.OpPrereqError("Unsupported disk template conversion from"
13561                                  " %s to %s" % (instance.disk_template,
13562                                                 self.op.disk_template),
13563                                  errors.ECODE_INVAL)
13564     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13565                         msg="cannot change disk template")
13566     if self.op.disk_template in constants.DTS_INT_MIRROR:
13567       if self.op.remote_node == pnode:
13568         raise errors.OpPrereqError("Given new secondary node %s is the same"
13569                                    " as the primary node of the instance" %
13570                                    self.op.remote_node, errors.ECODE_STATE)
13571       _CheckNodeOnline(self, self.op.remote_node)
13572       _CheckNodeNotDrained(self, self.op.remote_node)
13573       # FIXME: here we assume that the old instance type is DT_PLAIN
13574       assert instance.disk_template == constants.DT_PLAIN
13575       disks = [{constants.IDISK_SIZE: d.size,
13576                 constants.IDISK_VG: d.logical_id[0]}
13577                for d in instance.disks]
13578       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13579       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13580
13581       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13582       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13583       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13584                                                               snode_group)
13585       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13586                               ignore=self.op.ignore_ipolicy)
13587       if pnode_info.group != snode_info.group:
13588         self.LogWarning("The primary and secondary nodes are in two"
13589                         " different node groups; the disk parameters"
13590                         " from the first disk's node group will be"
13591                         " used")
13592
13593     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13594       # Make sure none of the nodes require exclusive storage
13595       nodes = [pnode_info]
13596       if self.op.disk_template in constants.DTS_INT_MIRROR:
13597         assert snode_info
13598         nodes.append(snode_info)
13599       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13600       if compat.any(map(has_es, nodes)):
13601         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13602                   " storage is enabled" % (instance.disk_template,
13603                                            self.op.disk_template))
13604         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13605
13606   def CheckPrereq(self):
13607     """Check prerequisites.
13608
13609     This only checks the instance list against the existing names.
13610
13611     """
13612     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13613     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13614
13615     cluster = self.cluster = self.cfg.GetClusterInfo()
13616     assert self.instance is not None, \
13617       "Cannot retrieve locked instance %s" % self.op.instance_name
13618
13619     pnode = instance.primary_node
13620     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13621     nodelist = list(instance.all_nodes)
13622     pnode_info = self.cfg.GetNodeInfo(pnode)
13623     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13624
13625     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13626     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13627     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13628
13629     # dictionary with instance information after the modification
13630     ispec = {}
13631
13632     # Check disk modifications. This is done here and not in CheckArguments
13633     # (as with NICs), because we need to know the instance's disk template
13634     if instance.disk_template == constants.DT_EXT:
13635       self._CheckMods("disk", self.op.disks, {},
13636                       self._VerifyDiskModification)
13637     else:
13638       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13639                       self._VerifyDiskModification)
13640
13641     # Prepare disk/NIC modifications
13642     self.diskmod = PrepareContainerMods(self.op.disks, None)
13643     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13644
13645     # Check the validity of the `provider' parameter
13646     if instance.disk_template in constants.DT_EXT:
13647       for mod in self.diskmod:
13648         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13649         if mod[0] == constants.DDM_ADD:
13650           if ext_provider is None:
13651             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13652                                        " '%s' missing, during disk add" %
13653                                        (constants.DT_EXT,
13654                                         constants.IDISK_PROVIDER),
13655                                        errors.ECODE_NOENT)
13656         elif mod[0] == constants.DDM_MODIFY:
13657           if ext_provider:
13658             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13659                                        " modification" %
13660                                        constants.IDISK_PROVIDER,
13661                                        errors.ECODE_INVAL)
13662     else:
13663       for mod in self.diskmod:
13664         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13665         if ext_provider is not None:
13666           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13667                                      " instances of type '%s'" %
13668                                      (constants.IDISK_PROVIDER,
13669                                       constants.DT_EXT),
13670                                      errors.ECODE_INVAL)
13671
13672     # OS change
13673     if self.op.os_name and not self.op.force:
13674       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13675                       self.op.force_variant)
13676       instance_os = self.op.os_name
13677     else:
13678       instance_os = instance.os
13679
13680     assert not (self.op.disk_template and self.op.disks), \
13681       "Can't modify disk template and apply disk changes at the same time"
13682
13683     if self.op.disk_template:
13684       self._PreCheckDiskTemplate(pnode_info)
13685
13686     # hvparams processing
13687     if self.op.hvparams:
13688       hv_type = instance.hypervisor
13689       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13690       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13691       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13692
13693       # local check
13694       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13695       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13696       self.hv_proposed = self.hv_new = hv_new # the new actual values
13697       self.hv_inst = i_hvdict # the new dict (without defaults)
13698     else:
13699       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13700                                               instance.hvparams)
13701       self.hv_new = self.hv_inst = {}
13702
13703     # beparams processing
13704     if self.op.beparams:
13705       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13706                                    use_none=True)
13707       objects.UpgradeBeParams(i_bedict)
13708       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13709       be_new = cluster.SimpleFillBE(i_bedict)
13710       self.be_proposed = self.be_new = be_new # the new actual values
13711       self.be_inst = i_bedict # the new dict (without defaults)
13712     else:
13713       self.be_new = self.be_inst = {}
13714       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13715     be_old = cluster.FillBE(instance)
13716
13717     # CPU param validation -- checking every time a parameter is
13718     # changed to cover all cases where either CPU mask or vcpus have
13719     # changed
13720     if (constants.BE_VCPUS in self.be_proposed and
13721         constants.HV_CPU_MASK in self.hv_proposed):
13722       cpu_list = \
13723         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13724       # Verify mask is consistent with number of vCPUs. Can skip this
13725       # test if only 1 entry in the CPU mask, which means same mask
13726       # is applied to all vCPUs.
13727       if (len(cpu_list) > 1 and
13728           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13729         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13730                                    " CPU mask [%s]" %
13731                                    (self.be_proposed[constants.BE_VCPUS],
13732                                     self.hv_proposed[constants.HV_CPU_MASK]),
13733                                    errors.ECODE_INVAL)
13734
13735       # Only perform this test if a new CPU mask is given
13736       if constants.HV_CPU_MASK in self.hv_new:
13737         # Calculate the largest CPU number requested
13738         max_requested_cpu = max(map(max, cpu_list))
13739         # Check that all of the instance's nodes have enough physical CPUs to
13740         # satisfy the requested CPU mask
13741         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13742                                 max_requested_cpu + 1, instance.hypervisor)
13743
13744     # osparams processing
13745     if self.op.osparams:
13746       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13747       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13748       self.os_inst = i_osdict # the new dict (without defaults)
13749     else:
13750       self.os_inst = {}
13751
13752     self.warn = []
13753
13754     #TODO(dynmem): do the appropriate check involving MINMEM
13755     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13756         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13757       mem_check_list = [pnode]
13758       if be_new[constants.BE_AUTO_BALANCE]:
13759         # either we changed auto_balance to yes or it was from before
13760         mem_check_list.extend(instance.secondary_nodes)
13761       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13762                                                   instance.hypervisor)
13763       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13764                                          [instance.hypervisor], False)
13765       pninfo = nodeinfo[pnode]
13766       msg = pninfo.fail_msg
13767       if msg:
13768         # Assume the primary node is unreachable and go ahead
13769         self.warn.append("Can't get info from primary node %s: %s" %
13770                          (pnode, msg))
13771       else:
13772         (_, _, (pnhvinfo, )) = pninfo.payload
13773         if not isinstance(pnhvinfo.get("memory_free", None), int):
13774           self.warn.append("Node data from primary node %s doesn't contain"
13775                            " free memory information" % pnode)
13776         elif instance_info.fail_msg:
13777           self.warn.append("Can't get instance runtime information: %s" %
13778                            instance_info.fail_msg)
13779         else:
13780           if instance_info.payload:
13781             current_mem = int(instance_info.payload["memory"])
13782           else:
13783             # Assume instance not running
13784             # (there is a slight race condition here, but it's not very
13785             # probable, and we have no other way to check)
13786             # TODO: Describe race condition
13787             current_mem = 0
13788           #TODO(dynmem): do the appropriate check involving MINMEM
13789           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13790                       pnhvinfo["memory_free"])
13791           if miss_mem > 0:
13792             raise errors.OpPrereqError("This change will prevent the instance"
13793                                        " from starting, due to %d MB of memory"
13794                                        " missing on its primary node" %
13795                                        miss_mem, errors.ECODE_NORES)
13796
13797       if be_new[constants.BE_AUTO_BALANCE]:
13798         for node, nres in nodeinfo.items():
13799           if node not in instance.secondary_nodes:
13800             continue
13801           nres.Raise("Can't get info from secondary node %s" % node,
13802                      prereq=True, ecode=errors.ECODE_STATE)
13803           (_, _, (nhvinfo, )) = nres.payload
13804           if not isinstance(nhvinfo.get("memory_free", None), int):
13805             raise errors.OpPrereqError("Secondary node %s didn't return free"
13806                                        " memory information" % node,
13807                                        errors.ECODE_STATE)
13808           #TODO(dynmem): do the appropriate check involving MINMEM
13809           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13810             raise errors.OpPrereqError("This change will prevent the instance"
13811                                        " from failover to its secondary node"
13812                                        " %s, due to not enough memory" % node,
13813                                        errors.ECODE_STATE)
13814
13815     if self.op.runtime_mem:
13816       remote_info = self.rpc.call_instance_info(instance.primary_node,
13817                                                 instance.name,
13818                                                 instance.hypervisor)
13819       remote_info.Raise("Error checking node %s" % instance.primary_node)
13820       if not remote_info.payload: # not running already
13821         raise errors.OpPrereqError("Instance %s is not running" %
13822                                    instance.name, errors.ECODE_STATE)
13823
13824       current_memory = remote_info.payload["memory"]
13825       if (not self.op.force and
13826            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13827             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13828         raise errors.OpPrereqError("Instance %s must have memory between %d"
13829                                    " and %d MB of memory unless --force is"
13830                                    " given" %
13831                                    (instance.name,
13832                                     self.be_proposed[constants.BE_MINMEM],
13833                                     self.be_proposed[constants.BE_MAXMEM]),
13834                                    errors.ECODE_INVAL)
13835
13836       delta = self.op.runtime_mem - current_memory
13837       if delta > 0:
13838         _CheckNodeFreeMemory(self, instance.primary_node,
13839                              "ballooning memory for instance %s" %
13840                              instance.name, delta, instance.hypervisor)
13841
13842     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13843       raise errors.OpPrereqError("Disk operations not supported for"
13844                                  " diskless instances", errors.ECODE_INVAL)
13845
13846     def _PrepareNicCreate(_, params, private):
13847       self._PrepareNicModification(params, private, None, None,
13848                                    {}, cluster, pnode)
13849       return (None, None)
13850
13851     def _PrepareNicMod(_, nic, params, private):
13852       self._PrepareNicModification(params, private, nic.ip, nic.network,
13853                                    nic.nicparams, cluster, pnode)
13854       return None
13855
13856     def _PrepareNicRemove(_, params, __):
13857       ip = params.ip
13858       net = params.network
13859       if net is not None and ip is not None:
13860         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13861
13862     # Verify NIC changes (operating on copy)
13863     nics = instance.nics[:]
13864     ApplyContainerMods("NIC", nics, None, self.nicmod,
13865                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13866     if len(nics) > constants.MAX_NICS:
13867       raise errors.OpPrereqError("Instance has too many network interfaces"
13868                                  " (%d), cannot add more" % constants.MAX_NICS,
13869                                  errors.ECODE_STATE)
13870
13871     # Verify disk changes (operating on a copy)
13872     disks = instance.disks[:]
13873     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13874     if len(disks) > constants.MAX_DISKS:
13875       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13876                                  " more" % constants.MAX_DISKS,
13877                                  errors.ECODE_STATE)
13878     disk_sizes = [disk.size for disk in instance.disks]
13879     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13880                       self.diskmod if op == constants.DDM_ADD)
13881     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13882     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13883
13884     if self.op.offline is not None and self.op.offline:
13885       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13886                           msg="can't change to offline")
13887
13888     # Pre-compute NIC changes (necessary to use result in hooks)
13889     self._nic_chgdesc = []
13890     if self.nicmod:
13891       # Operate on copies as this is still in prereq
13892       nics = [nic.Copy() for nic in instance.nics]
13893       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13894                          self._CreateNewNic, self._ApplyNicMods, None)
13895       self._new_nics = nics
13896       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13897     else:
13898       self._new_nics = None
13899       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13900
13901     if not self.op.ignore_ipolicy:
13902       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13903                                                               group_info)
13904
13905       # Fill ispec with backend parameters
13906       ispec[constants.ISPEC_SPINDLE_USE] = \
13907         self.be_new.get(constants.BE_SPINDLE_USE, None)
13908       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13909                                                          None)
13910
13911       # Copy ispec to verify parameters with min/max values separately
13912       ispec_max = ispec.copy()
13913       ispec_max[constants.ISPEC_MEM_SIZE] = \
13914         self.be_new.get(constants.BE_MAXMEM, None)
13915       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13916       ispec_min = ispec.copy()
13917       ispec_min[constants.ISPEC_MEM_SIZE] = \
13918         self.be_new.get(constants.BE_MINMEM, None)
13919       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13920
13921       if (res_max or res_min):
13922         # FIXME: Improve error message by including information about whether
13923         # the upper or lower limit of the parameter fails the ipolicy.
13924         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13925                (group_info, group_info.name,
13926                 utils.CommaJoin(set(res_max + res_min))))
13927         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13928
13929   def _ConvertPlainToDrbd(self, feedback_fn):
13930     """Converts an instance from plain to drbd.
13931
13932     """
13933     feedback_fn("Converting template to drbd")
13934     instance = self.instance
13935     pnode = instance.primary_node
13936     snode = self.op.remote_node
13937
13938     assert instance.disk_template == constants.DT_PLAIN
13939
13940     # create a fake disk info for _GenerateDiskTemplate
13941     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13942                   constants.IDISK_VG: d.logical_id[0]}
13943                  for d in instance.disks]
13944     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13945                                       instance.name, pnode, [snode],
13946                                       disk_info, None, None, 0, feedback_fn,
13947                                       self.diskparams)
13948     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13949                                         self.diskparams)
13950     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13951     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13952     info = _GetInstanceInfoText(instance)
13953     feedback_fn("Creating additional volumes...")
13954     # first, create the missing data and meta devices
13955     for disk in anno_disks:
13956       # unfortunately this is... not too nice
13957       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13958                             info, True, p_excl_stor)
13959       for child in disk.children:
13960         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13961                               s_excl_stor)
13962     # at this stage, all new LVs have been created, we can rename the
13963     # old ones
13964     feedback_fn("Renaming original volumes...")
13965     rename_list = [(o, n.children[0].logical_id)
13966                    for (o, n) in zip(instance.disks, new_disks)]
13967     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13968     result.Raise("Failed to rename original LVs")
13969
13970     feedback_fn("Initializing DRBD devices...")
13971     # all child devices are in place, we can now create the DRBD devices
13972     for disk in anno_disks:
13973       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13974         f_create = node == pnode
13975         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13976                               excl_stor)
13977
13978     # at this point, the instance has been modified
13979     instance.disk_template = constants.DT_DRBD8
13980     instance.disks = new_disks
13981     self.cfg.Update(instance, feedback_fn)
13982
13983     # Release node locks while waiting for sync
13984     _ReleaseLocks(self, locking.LEVEL_NODE)
13985
13986     # disks are created, waiting for sync
13987     disk_abort = not _WaitForSync(self, instance,
13988                                   oneshot=not self.op.wait_for_sync)
13989     if disk_abort:
13990       raise errors.OpExecError("There are some degraded disks for"
13991                                " this instance, please cleanup manually")
13992
13993     # Node resource locks will be released by caller
13994
13995   def _ConvertDrbdToPlain(self, feedback_fn):
13996     """Converts an instance from drbd to plain.
13997
13998     """
13999     instance = self.instance
14000
14001     assert len(instance.secondary_nodes) == 1
14002     assert instance.disk_template == constants.DT_DRBD8
14003
14004     pnode = instance.primary_node
14005     snode = instance.secondary_nodes[0]
14006     feedback_fn("Converting template to plain")
14007
14008     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14009     new_disks = [d.children[0] for d in instance.disks]
14010
14011     # copy over size and mode
14012     for parent, child in zip(old_disks, new_disks):
14013       child.size = parent.size
14014       child.mode = parent.mode
14015
14016     # this is a DRBD disk, return its port to the pool
14017     # NOTE: this must be done right before the call to cfg.Update!
14018     for disk in old_disks:
14019       tcp_port = disk.logical_id[2]
14020       self.cfg.AddTcpUdpPort(tcp_port)
14021
14022     # update instance structure
14023     instance.disks = new_disks
14024     instance.disk_template = constants.DT_PLAIN
14025     self.cfg.Update(instance, feedback_fn)
14026
14027     # Release locks in case removing disks takes a while
14028     _ReleaseLocks(self, locking.LEVEL_NODE)
14029
14030     feedback_fn("Removing volumes on the secondary node...")
14031     for disk in old_disks:
14032       self.cfg.SetDiskID(disk, snode)
14033       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14034       if msg:
14035         self.LogWarning("Could not remove block device %s on node %s,"
14036                         " continuing anyway: %s", disk.iv_name, snode, msg)
14037
14038     feedback_fn("Removing unneeded volumes on the primary node...")
14039     for idx, disk in enumerate(old_disks):
14040       meta = disk.children[1]
14041       self.cfg.SetDiskID(meta, pnode)
14042       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14043       if msg:
14044         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14045                         " continuing anyway: %s", idx, pnode, msg)
14046
14047   def _CreateNewDisk(self, idx, params, _):
14048     """Creates a new disk.
14049
14050     """
14051     instance = self.instance
14052
14053     # add a new disk
14054     if instance.disk_template in constants.DTS_FILEBASED:
14055       (file_driver, file_path) = instance.disks[0].logical_id
14056       file_path = os.path.dirname(file_path)
14057     else:
14058       file_driver = file_path = None
14059
14060     disk = \
14061       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14062                             instance.primary_node, instance.secondary_nodes,
14063                             [params], file_path, file_driver, idx,
14064                             self.Log, self.diskparams)[0]
14065
14066     info = _GetInstanceInfoText(instance)
14067
14068     logging.info("Creating volume %s for instance %s",
14069                  disk.iv_name, instance.name)
14070     # Note: this needs to be kept in sync with _CreateDisks
14071     #HARDCODE
14072     for node in instance.all_nodes:
14073       f_create = (node == instance.primary_node)
14074       try:
14075         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14076       except errors.OpExecError, err:
14077         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14078                         disk.iv_name, disk, node, err)
14079
14080     return (disk, [
14081       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14082       ])
14083
14084   @staticmethod
14085   def _ModifyDisk(idx, disk, params, _):
14086     """Modifies a disk.
14087
14088     """
14089     disk.mode = params[constants.IDISK_MODE]
14090
14091     return [
14092       ("disk.mode/%d" % idx, disk.mode),
14093       ]
14094
14095   def _RemoveDisk(self, idx, root, _):
14096     """Removes a disk.
14097
14098     """
14099     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14100     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14101       self.cfg.SetDiskID(disk, node)
14102       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14103       if msg:
14104         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14105                         " continuing anyway", idx, node, msg)
14106
14107     # if this is a DRBD disk, return its port to the pool
14108     if root.dev_type in constants.LDS_DRBD:
14109       self.cfg.AddTcpUdpPort(root.logical_id[2])
14110
14111   def _CreateNewNic(self, idx, params, private):
14112     """Creates data structure for a new network interface.
14113
14114     """
14115     mac = params[constants.INIC_MAC]
14116     ip = params.get(constants.INIC_IP, None)
14117     net = params.get(constants.INIC_NETWORK, None)
14118     net_uuid = self.cfg.LookupNetwork(net)
14119     #TODO: not private.filled?? can a nic have no nicparams??
14120     nicparams = private.filled
14121     nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14122
14123     return (nobj, [
14124       ("nic.%d" % idx,
14125        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14126        (mac, ip, private.filled[constants.NIC_MODE],
14127        private.filled[constants.NIC_LINK],
14128        net)),
14129       ])
14130
14131   def _ApplyNicMods(self, idx, nic, params, private):
14132     """Modifies a network interface.
14133
14134     """
14135     changes = []
14136
14137     for key in [constants.INIC_MAC, constants.INIC_IP]:
14138       if key in params:
14139         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14140         setattr(nic, key, params[key])
14141
14142     new_net = params.get(constants.INIC_NETWORK, nic.network)
14143     new_net_uuid = self.cfg.LookupNetwork(new_net)
14144     if new_net_uuid != nic.network:
14145       changes.append(("nic.network/%d" % idx, new_net))
14146       nic.network = new_net_uuid
14147
14148     if private.filled:
14149       nic.nicparams = private.filled
14150
14151       for (key, val) in nic.nicparams.items():
14152         changes.append(("nic.%s/%d" % (key, idx), val))
14153
14154     return changes
14155
14156   def Exec(self, feedback_fn):
14157     """Modifies an instance.
14158
14159     All parameters take effect only at the next restart of the instance.
14160
14161     """
14162     # Process here the warnings from CheckPrereq, as we don't have a
14163     # feedback_fn there.
14164     # TODO: Replace with self.LogWarning
14165     for warn in self.warn:
14166       feedback_fn("WARNING: %s" % warn)
14167
14168     assert ((self.op.disk_template is None) ^
14169             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14170       "Not owning any node resource locks"
14171
14172     result = []
14173     instance = self.instance
14174
14175     # runtime memory
14176     if self.op.runtime_mem:
14177       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14178                                                      instance,
14179                                                      self.op.runtime_mem)
14180       rpcres.Raise("Cannot modify instance runtime memory")
14181       result.append(("runtime_memory", self.op.runtime_mem))
14182
14183     # Apply disk changes
14184     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14185                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14186     _UpdateIvNames(0, instance.disks)
14187
14188     if self.op.disk_template:
14189       if __debug__:
14190         check_nodes = set(instance.all_nodes)
14191         if self.op.remote_node:
14192           check_nodes.add(self.op.remote_node)
14193         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14194           owned = self.owned_locks(level)
14195           assert not (check_nodes - owned), \
14196             ("Not owning the correct locks, owning %r, expected at least %r" %
14197              (owned, check_nodes))
14198
14199       r_shut = _ShutdownInstanceDisks(self, instance)
14200       if not r_shut:
14201         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14202                                  " proceed with disk template conversion")
14203       mode = (instance.disk_template, self.op.disk_template)
14204       try:
14205         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14206       except:
14207         self.cfg.ReleaseDRBDMinors(instance.name)
14208         raise
14209       result.append(("disk_template", self.op.disk_template))
14210
14211       assert instance.disk_template == self.op.disk_template, \
14212         ("Expected disk template '%s', found '%s'" %
14213          (self.op.disk_template, instance.disk_template))
14214
14215     # Release node and resource locks if there are any (they might already have
14216     # been released during disk conversion)
14217     _ReleaseLocks(self, locking.LEVEL_NODE)
14218     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14219
14220     # Apply NIC changes
14221     if self._new_nics is not None:
14222       instance.nics = self._new_nics
14223       result.extend(self._nic_chgdesc)
14224
14225     # hvparams changes
14226     if self.op.hvparams:
14227       instance.hvparams = self.hv_inst
14228       for key, val in self.op.hvparams.iteritems():
14229         result.append(("hv/%s" % key, val))
14230
14231     # beparams changes
14232     if self.op.beparams:
14233       instance.beparams = self.be_inst
14234       for key, val in self.op.beparams.iteritems():
14235         result.append(("be/%s" % key, val))
14236
14237     # OS change
14238     if self.op.os_name:
14239       instance.os = self.op.os_name
14240
14241     # osparams changes
14242     if self.op.osparams:
14243       instance.osparams = self.os_inst
14244       for key, val in self.op.osparams.iteritems():
14245         result.append(("os/%s" % key, val))
14246
14247     if self.op.offline is None:
14248       # Ignore
14249       pass
14250     elif self.op.offline:
14251       # Mark instance as offline
14252       self.cfg.MarkInstanceOffline(instance.name)
14253       result.append(("admin_state", constants.ADMINST_OFFLINE))
14254     else:
14255       # Mark instance as online, but stopped
14256       self.cfg.MarkInstanceDown(instance.name)
14257       result.append(("admin_state", constants.ADMINST_DOWN))
14258
14259     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14260
14261     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14262                 self.owned_locks(locking.LEVEL_NODE)), \
14263       "All node locks should have been released by now"
14264
14265     return result
14266
14267   _DISK_CONVERSIONS = {
14268     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14269     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14270     }
14271
14272
14273 class LUInstanceChangeGroup(LogicalUnit):
14274   HPATH = "instance-change-group"
14275   HTYPE = constants.HTYPE_INSTANCE
14276   REQ_BGL = False
14277
14278   def ExpandNames(self):
14279     self.share_locks = _ShareAll()
14280
14281     self.needed_locks = {
14282       locking.LEVEL_NODEGROUP: [],
14283       locking.LEVEL_NODE: [],
14284       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14285       }
14286
14287     self._ExpandAndLockInstance()
14288
14289     if self.op.target_groups:
14290       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14291                                   self.op.target_groups)
14292     else:
14293       self.req_target_uuids = None
14294
14295     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14296
14297   def DeclareLocks(self, level):
14298     if level == locking.LEVEL_NODEGROUP:
14299       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14300
14301       if self.req_target_uuids:
14302         lock_groups = set(self.req_target_uuids)
14303
14304         # Lock all groups used by instance optimistically; this requires going
14305         # via the node before it's locked, requiring verification later on
14306         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14307         lock_groups.update(instance_groups)
14308       else:
14309         # No target groups, need to lock all of them
14310         lock_groups = locking.ALL_SET
14311
14312       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14313
14314     elif level == locking.LEVEL_NODE:
14315       if self.req_target_uuids:
14316         # Lock all nodes used by instances
14317         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14318         self._LockInstancesNodes()
14319
14320         # Lock all nodes in all potential target groups
14321         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14322                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14323         member_nodes = [node_name
14324                         for group in lock_groups
14325                         for node_name in self.cfg.GetNodeGroup(group).members]
14326         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14327       else:
14328         # Lock all nodes as all groups are potential targets
14329         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14330
14331   def CheckPrereq(self):
14332     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14333     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14334     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14335
14336     assert (self.req_target_uuids is None or
14337             owned_groups.issuperset(self.req_target_uuids))
14338     assert owned_instances == set([self.op.instance_name])
14339
14340     # Get instance information
14341     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14342
14343     # Check if node groups for locked instance are still correct
14344     assert owned_nodes.issuperset(self.instance.all_nodes), \
14345       ("Instance %s's nodes changed while we kept the lock" %
14346        self.op.instance_name)
14347
14348     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14349                                            owned_groups)
14350
14351     if self.req_target_uuids:
14352       # User requested specific target groups
14353       self.target_uuids = frozenset(self.req_target_uuids)
14354     else:
14355       # All groups except those used by the instance are potential targets
14356       self.target_uuids = owned_groups - inst_groups
14357
14358     conflicting_groups = self.target_uuids & inst_groups
14359     if conflicting_groups:
14360       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14361                                  " used by the instance '%s'" %
14362                                  (utils.CommaJoin(conflicting_groups),
14363                                   self.op.instance_name),
14364                                  errors.ECODE_INVAL)
14365
14366     if not self.target_uuids:
14367       raise errors.OpPrereqError("There are no possible target groups",
14368                                  errors.ECODE_INVAL)
14369
14370   def BuildHooksEnv(self):
14371     """Build hooks env.
14372
14373     """
14374     assert self.target_uuids
14375
14376     env = {
14377       "TARGET_GROUPS": " ".join(self.target_uuids),
14378       }
14379
14380     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14381
14382     return env
14383
14384   def BuildHooksNodes(self):
14385     """Build hooks nodes.
14386
14387     """
14388     mn = self.cfg.GetMasterNode()
14389     return ([mn], [mn])
14390
14391   def Exec(self, feedback_fn):
14392     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14393
14394     assert instances == [self.op.instance_name], "Instance not locked"
14395
14396     req = iallocator.IAReqGroupChange(instances=instances,
14397                                       target_groups=list(self.target_uuids))
14398     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14399
14400     ial.Run(self.op.iallocator)
14401
14402     if not ial.success:
14403       raise errors.OpPrereqError("Can't compute solution for changing group of"
14404                                  " instance '%s' using iallocator '%s': %s" %
14405                                  (self.op.instance_name, self.op.iallocator,
14406                                   ial.info), errors.ECODE_NORES)
14407
14408     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14409
14410     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14411                  " instance '%s'", len(jobs), self.op.instance_name)
14412
14413     return ResultWithJobs(jobs)
14414
14415
14416 class LUBackupQuery(NoHooksLU):
14417   """Query the exports list
14418
14419   """
14420   REQ_BGL = False
14421
14422   def CheckArguments(self):
14423     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14424                              ["node", "export"], self.op.use_locking)
14425
14426   def ExpandNames(self):
14427     self.expq.ExpandNames(self)
14428
14429   def DeclareLocks(self, level):
14430     self.expq.DeclareLocks(self, level)
14431
14432   def Exec(self, feedback_fn):
14433     result = {}
14434
14435     for (node, expname) in self.expq.OldStyleQuery(self):
14436       if expname is None:
14437         result[node] = False
14438       else:
14439         result.setdefault(node, []).append(expname)
14440
14441     return result
14442
14443
14444 class _ExportQuery(_QueryBase):
14445   FIELDS = query.EXPORT_FIELDS
14446
14447   #: The node name is not a unique key for this query
14448   SORT_FIELD = "node"
14449
14450   def ExpandNames(self, lu):
14451     lu.needed_locks = {}
14452
14453     # The following variables interact with _QueryBase._GetNames
14454     if self.names:
14455       self.wanted = _GetWantedNodes(lu, self.names)
14456     else:
14457       self.wanted = locking.ALL_SET
14458
14459     self.do_locking = self.use_locking
14460
14461     if self.do_locking:
14462       lu.share_locks = _ShareAll()
14463       lu.needed_locks = {
14464         locking.LEVEL_NODE: self.wanted,
14465         }
14466
14467       if not self.names:
14468         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14469
14470   def DeclareLocks(self, lu, level):
14471     pass
14472
14473   def _GetQueryData(self, lu):
14474     """Computes the list of nodes and their attributes.
14475
14476     """
14477     # Locking is not used
14478     # TODO
14479     assert not (compat.any(lu.glm.is_owned(level)
14480                            for level in locking.LEVELS
14481                            if level != locking.LEVEL_CLUSTER) or
14482                 self.do_locking or self.use_locking)
14483
14484     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14485
14486     result = []
14487
14488     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14489       if nres.fail_msg:
14490         result.append((node, None))
14491       else:
14492         result.extend((node, expname) for expname in nres.payload)
14493
14494     return result
14495
14496
14497 class LUBackupPrepare(NoHooksLU):
14498   """Prepares an instance for an export and returns useful information.
14499
14500   """
14501   REQ_BGL = False
14502
14503   def ExpandNames(self):
14504     self._ExpandAndLockInstance()
14505
14506   def CheckPrereq(self):
14507     """Check prerequisites.
14508
14509     """
14510     instance_name = self.op.instance_name
14511
14512     self.instance = self.cfg.GetInstanceInfo(instance_name)
14513     assert self.instance is not None, \
14514           "Cannot retrieve locked instance %s" % self.op.instance_name
14515     _CheckNodeOnline(self, self.instance.primary_node)
14516
14517     self._cds = _GetClusterDomainSecret()
14518
14519   def Exec(self, feedback_fn):
14520     """Prepares an instance for an export.
14521
14522     """
14523     instance = self.instance
14524
14525     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14526       salt = utils.GenerateSecret(8)
14527
14528       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14529       result = self.rpc.call_x509_cert_create(instance.primary_node,
14530                                               constants.RIE_CERT_VALIDITY)
14531       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14532
14533       (name, cert_pem) = result.payload
14534
14535       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14536                                              cert_pem)
14537
14538       return {
14539         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14540         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14541                           salt),
14542         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14543         }
14544
14545     return None
14546
14547
14548 class LUBackupExport(LogicalUnit):
14549   """Export an instance to an image in the cluster.
14550
14551   """
14552   HPATH = "instance-export"
14553   HTYPE = constants.HTYPE_INSTANCE
14554   REQ_BGL = False
14555
14556   def CheckArguments(self):
14557     """Check the arguments.
14558
14559     """
14560     self.x509_key_name = self.op.x509_key_name
14561     self.dest_x509_ca_pem = self.op.destination_x509_ca
14562
14563     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14564       if not self.x509_key_name:
14565         raise errors.OpPrereqError("Missing X509 key name for encryption",
14566                                    errors.ECODE_INVAL)
14567
14568       if not self.dest_x509_ca_pem:
14569         raise errors.OpPrereqError("Missing destination X509 CA",
14570                                    errors.ECODE_INVAL)
14571
14572   def ExpandNames(self):
14573     self._ExpandAndLockInstance()
14574
14575     # Lock all nodes for local exports
14576     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14577       # FIXME: lock only instance primary and destination node
14578       #
14579       # Sad but true, for now we have do lock all nodes, as we don't know where
14580       # the previous export might be, and in this LU we search for it and
14581       # remove it from its current node. In the future we could fix this by:
14582       #  - making a tasklet to search (share-lock all), then create the
14583       #    new one, then one to remove, after
14584       #  - removing the removal operation altogether
14585       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14586
14587       # Allocations should be stopped while this LU runs with node locks, but
14588       # it doesn't have to be exclusive
14589       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14590       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14591
14592   def DeclareLocks(self, level):
14593     """Last minute lock declaration."""
14594     # All nodes are locked anyway, so nothing to do here.
14595
14596   def BuildHooksEnv(self):
14597     """Build hooks env.
14598
14599     This will run on the master, primary node and target node.
14600
14601     """
14602     env = {
14603       "EXPORT_MODE": self.op.mode,
14604       "EXPORT_NODE": self.op.target_node,
14605       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14606       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14607       # TODO: Generic function for boolean env variables
14608       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14609       }
14610
14611     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14612
14613     return env
14614
14615   def BuildHooksNodes(self):
14616     """Build hooks nodes.
14617
14618     """
14619     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14620
14621     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14622       nl.append(self.op.target_node)
14623
14624     return (nl, nl)
14625
14626   def CheckPrereq(self):
14627     """Check prerequisites.
14628
14629     This checks that the instance and node names are valid.
14630
14631     """
14632     instance_name = self.op.instance_name
14633
14634     self.instance = self.cfg.GetInstanceInfo(instance_name)
14635     assert self.instance is not None, \
14636           "Cannot retrieve locked instance %s" % self.op.instance_name
14637     _CheckNodeOnline(self, self.instance.primary_node)
14638
14639     if (self.op.remove_instance and
14640         self.instance.admin_state == constants.ADMINST_UP and
14641         not self.op.shutdown):
14642       raise errors.OpPrereqError("Can not remove instance without shutting it"
14643                                  " down before", errors.ECODE_STATE)
14644
14645     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14646       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14647       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14648       assert self.dst_node is not None
14649
14650       _CheckNodeOnline(self, self.dst_node.name)
14651       _CheckNodeNotDrained(self, self.dst_node.name)
14652
14653       self._cds = None
14654       self.dest_disk_info = None
14655       self.dest_x509_ca = None
14656
14657     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14658       self.dst_node = None
14659
14660       if len(self.op.target_node) != len(self.instance.disks):
14661         raise errors.OpPrereqError(("Received destination information for %s"
14662                                     " disks, but instance %s has %s disks") %
14663                                    (len(self.op.target_node), instance_name,
14664                                     len(self.instance.disks)),
14665                                    errors.ECODE_INVAL)
14666
14667       cds = _GetClusterDomainSecret()
14668
14669       # Check X509 key name
14670       try:
14671         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14672       except (TypeError, ValueError), err:
14673         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14674                                    errors.ECODE_INVAL)
14675
14676       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14677         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14678                                    errors.ECODE_INVAL)
14679
14680       # Load and verify CA
14681       try:
14682         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14683       except OpenSSL.crypto.Error, err:
14684         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14685                                    (err, ), errors.ECODE_INVAL)
14686
14687       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14688       if errcode is not None:
14689         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14690                                    (msg, ), errors.ECODE_INVAL)
14691
14692       self.dest_x509_ca = cert
14693
14694       # Verify target information
14695       disk_info = []
14696       for idx, disk_data in enumerate(self.op.target_node):
14697         try:
14698           (host, port, magic) = \
14699             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14700         except errors.GenericError, err:
14701           raise errors.OpPrereqError("Target info for disk %s: %s" %
14702                                      (idx, err), errors.ECODE_INVAL)
14703
14704         disk_info.append((host, port, magic))
14705
14706       assert len(disk_info) == len(self.op.target_node)
14707       self.dest_disk_info = disk_info
14708
14709     else:
14710       raise errors.ProgrammerError("Unhandled export mode %r" %
14711                                    self.op.mode)
14712
14713     # instance disk type verification
14714     # TODO: Implement export support for file-based disks
14715     for disk in self.instance.disks:
14716       if disk.dev_type == constants.LD_FILE:
14717         raise errors.OpPrereqError("Export not supported for instances with"
14718                                    " file-based disks", errors.ECODE_INVAL)
14719
14720   def _CleanupExports(self, feedback_fn):
14721     """Removes exports of current instance from all other nodes.
14722
14723     If an instance in a cluster with nodes A..D was exported to node C, its
14724     exports will be removed from the nodes A, B and D.
14725
14726     """
14727     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14728
14729     nodelist = self.cfg.GetNodeList()
14730     nodelist.remove(self.dst_node.name)
14731
14732     # on one-node clusters nodelist will be empty after the removal
14733     # if we proceed the backup would be removed because OpBackupQuery
14734     # substitutes an empty list with the full cluster node list.
14735     iname = self.instance.name
14736     if nodelist:
14737       feedback_fn("Removing old exports for instance %s" % iname)
14738       exportlist = self.rpc.call_export_list(nodelist)
14739       for node in exportlist:
14740         if exportlist[node].fail_msg:
14741           continue
14742         if iname in exportlist[node].payload:
14743           msg = self.rpc.call_export_remove(node, iname).fail_msg
14744           if msg:
14745             self.LogWarning("Could not remove older export for instance %s"
14746                             " on node %s: %s", iname, node, msg)
14747
14748   def Exec(self, feedback_fn):
14749     """Export an instance to an image in the cluster.
14750
14751     """
14752     assert self.op.mode in constants.EXPORT_MODES
14753
14754     instance = self.instance
14755     src_node = instance.primary_node
14756
14757     if self.op.shutdown:
14758       # shutdown the instance, but not the disks
14759       feedback_fn("Shutting down instance %s" % instance.name)
14760       result = self.rpc.call_instance_shutdown(src_node, instance,
14761                                                self.op.shutdown_timeout)
14762       # TODO: Maybe ignore failures if ignore_remove_failures is set
14763       result.Raise("Could not shutdown instance %s on"
14764                    " node %s" % (instance.name, src_node))
14765
14766     # set the disks ID correctly since call_instance_start needs the
14767     # correct drbd minor to create the symlinks
14768     for disk in instance.disks:
14769       self.cfg.SetDiskID(disk, src_node)
14770
14771     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14772
14773     if activate_disks:
14774       # Activate the instance disks if we'exporting a stopped instance
14775       feedback_fn("Activating disks for %s" % instance.name)
14776       _StartInstanceDisks(self, instance, None)
14777
14778     try:
14779       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14780                                                      instance)
14781
14782       helper.CreateSnapshots()
14783       try:
14784         if (self.op.shutdown and
14785             instance.admin_state == constants.ADMINST_UP and
14786             not self.op.remove_instance):
14787           assert not activate_disks
14788           feedback_fn("Starting instance %s" % instance.name)
14789           result = self.rpc.call_instance_start(src_node,
14790                                                 (instance, None, None), False)
14791           msg = result.fail_msg
14792           if msg:
14793             feedback_fn("Failed to start instance: %s" % msg)
14794             _ShutdownInstanceDisks(self, instance)
14795             raise errors.OpExecError("Could not start instance: %s" % msg)
14796
14797         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14798           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14799         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14800           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14801           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14802
14803           (key_name, _, _) = self.x509_key_name
14804
14805           dest_ca_pem = \
14806             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14807                                             self.dest_x509_ca)
14808
14809           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14810                                                      key_name, dest_ca_pem,
14811                                                      timeouts)
14812       finally:
14813         helper.Cleanup()
14814
14815       # Check for backwards compatibility
14816       assert len(dresults) == len(instance.disks)
14817       assert compat.all(isinstance(i, bool) for i in dresults), \
14818              "Not all results are boolean: %r" % dresults
14819
14820     finally:
14821       if activate_disks:
14822         feedback_fn("Deactivating disks for %s" % instance.name)
14823         _ShutdownInstanceDisks(self, instance)
14824
14825     if not (compat.all(dresults) and fin_resu):
14826       failures = []
14827       if not fin_resu:
14828         failures.append("export finalization")
14829       if not compat.all(dresults):
14830         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14831                                if not dsk)
14832         failures.append("disk export: disk(s) %s" % fdsk)
14833
14834       raise errors.OpExecError("Export failed, errors in %s" %
14835                                utils.CommaJoin(failures))
14836
14837     # At this point, the export was successful, we can cleanup/finish
14838
14839     # Remove instance if requested
14840     if self.op.remove_instance:
14841       feedback_fn("Removing instance %s" % instance.name)
14842       _RemoveInstance(self, feedback_fn, instance,
14843                       self.op.ignore_remove_failures)
14844
14845     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14846       self._CleanupExports(feedback_fn)
14847
14848     return fin_resu, dresults
14849
14850
14851 class LUBackupRemove(NoHooksLU):
14852   """Remove exports related to the named instance.
14853
14854   """
14855   REQ_BGL = False
14856
14857   def ExpandNames(self):
14858     self.needed_locks = {
14859       # We need all nodes to be locked in order for RemoveExport to work, but
14860       # we don't need to lock the instance itself, as nothing will happen to it
14861       # (and we can remove exports also for a removed instance)
14862       locking.LEVEL_NODE: locking.ALL_SET,
14863
14864       # Removing backups is quick, so blocking allocations is justified
14865       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14866       }
14867
14868     # Allocations should be stopped while this LU runs with node locks, but it
14869     # doesn't have to be exclusive
14870     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14871
14872   def Exec(self, feedback_fn):
14873     """Remove any export.
14874
14875     """
14876     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14877     # If the instance was not found we'll try with the name that was passed in.
14878     # This will only work if it was an FQDN, though.
14879     fqdn_warn = False
14880     if not instance_name:
14881       fqdn_warn = True
14882       instance_name = self.op.instance_name
14883
14884     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14885     exportlist = self.rpc.call_export_list(locked_nodes)
14886     found = False
14887     for node in exportlist:
14888       msg = exportlist[node].fail_msg
14889       if msg:
14890         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14891         continue
14892       if instance_name in exportlist[node].payload:
14893         found = True
14894         result = self.rpc.call_export_remove(node, instance_name)
14895         msg = result.fail_msg
14896         if msg:
14897           logging.error("Could not remove export for instance %s"
14898                         " on node %s: %s", instance_name, node, msg)
14899
14900     if fqdn_warn and not found:
14901       feedback_fn("Export not found. If trying to remove an export belonging"
14902                   " to a deleted instance please use its Fully Qualified"
14903                   " Domain Name.")
14904
14905
14906 class LUGroupAdd(LogicalUnit):
14907   """Logical unit for creating node groups.
14908
14909   """
14910   HPATH = "group-add"
14911   HTYPE = constants.HTYPE_GROUP
14912   REQ_BGL = False
14913
14914   def ExpandNames(self):
14915     # We need the new group's UUID here so that we can create and acquire the
14916     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14917     # that it should not check whether the UUID exists in the configuration.
14918     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14919     self.needed_locks = {}
14920     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14921
14922   def CheckPrereq(self):
14923     """Check prerequisites.
14924
14925     This checks that the given group name is not an existing node group
14926     already.
14927
14928     """
14929     try:
14930       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14931     except errors.OpPrereqError:
14932       pass
14933     else:
14934       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14935                                  " node group (UUID: %s)" %
14936                                  (self.op.group_name, existing_uuid),
14937                                  errors.ECODE_EXISTS)
14938
14939     if self.op.ndparams:
14940       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14941
14942     if self.op.hv_state:
14943       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14944     else:
14945       self.new_hv_state = None
14946
14947     if self.op.disk_state:
14948       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14949     else:
14950       self.new_disk_state = None
14951
14952     if self.op.diskparams:
14953       for templ in constants.DISK_TEMPLATES:
14954         if templ in self.op.diskparams:
14955           utils.ForceDictType(self.op.diskparams[templ],
14956                               constants.DISK_DT_TYPES)
14957       self.new_diskparams = self.op.diskparams
14958       try:
14959         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14960       except errors.OpPrereqError, err:
14961         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14962                                    errors.ECODE_INVAL)
14963     else:
14964       self.new_diskparams = {}
14965
14966     if self.op.ipolicy:
14967       cluster = self.cfg.GetClusterInfo()
14968       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14969       try:
14970         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14971       except errors.ConfigurationError, err:
14972         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14973                                    errors.ECODE_INVAL)
14974
14975   def BuildHooksEnv(self):
14976     """Build hooks env.
14977
14978     """
14979     return {
14980       "GROUP_NAME": self.op.group_name,
14981       }
14982
14983   def BuildHooksNodes(self):
14984     """Build hooks nodes.
14985
14986     """
14987     mn = self.cfg.GetMasterNode()
14988     return ([mn], [mn])
14989
14990   def Exec(self, feedback_fn):
14991     """Add the node group to the cluster.
14992
14993     """
14994     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14995                                   uuid=self.group_uuid,
14996                                   alloc_policy=self.op.alloc_policy,
14997                                   ndparams=self.op.ndparams,
14998                                   diskparams=self.new_diskparams,
14999                                   ipolicy=self.op.ipolicy,
15000                                   hv_state_static=self.new_hv_state,
15001                                   disk_state_static=self.new_disk_state)
15002
15003     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15004     del self.remove_locks[locking.LEVEL_NODEGROUP]
15005
15006
15007 class LUGroupAssignNodes(NoHooksLU):
15008   """Logical unit for assigning nodes to groups.
15009
15010   """
15011   REQ_BGL = False
15012
15013   def ExpandNames(self):
15014     # These raise errors.OpPrereqError on their own:
15015     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15016     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15017
15018     # We want to lock all the affected nodes and groups. We have readily
15019     # available the list of nodes, and the *destination* group. To gather the
15020     # list of "source" groups, we need to fetch node information later on.
15021     self.needed_locks = {
15022       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15023       locking.LEVEL_NODE: self.op.nodes,
15024       }
15025
15026   def DeclareLocks(self, level):
15027     if level == locking.LEVEL_NODEGROUP:
15028       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15029
15030       # Try to get all affected nodes' groups without having the group or node
15031       # lock yet. Needs verification later in the code flow.
15032       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15033
15034       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15035
15036   def CheckPrereq(self):
15037     """Check prerequisites.
15038
15039     """
15040     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15041     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15042             frozenset(self.op.nodes))
15043
15044     expected_locks = (set([self.group_uuid]) |
15045                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15046     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15047     if actual_locks != expected_locks:
15048       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15049                                " current groups are '%s', used to be '%s'" %
15050                                (utils.CommaJoin(expected_locks),
15051                                 utils.CommaJoin(actual_locks)))
15052
15053     self.node_data = self.cfg.GetAllNodesInfo()
15054     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15055     instance_data = self.cfg.GetAllInstancesInfo()
15056
15057     if self.group is None:
15058       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15059                                (self.op.group_name, self.group_uuid))
15060
15061     (new_splits, previous_splits) = \
15062       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15063                                              for node in self.op.nodes],
15064                                             self.node_data, instance_data)
15065
15066     if new_splits:
15067       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15068
15069       if not self.op.force:
15070         raise errors.OpExecError("The following instances get split by this"
15071                                  " change and --force was not given: %s" %
15072                                  fmt_new_splits)
15073       else:
15074         self.LogWarning("This operation will split the following instances: %s",
15075                         fmt_new_splits)
15076
15077         if previous_splits:
15078           self.LogWarning("In addition, these already-split instances continue"
15079                           " to be split across groups: %s",
15080                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15081
15082   def Exec(self, feedback_fn):
15083     """Assign nodes to a new group.
15084
15085     """
15086     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15087
15088     self.cfg.AssignGroupNodes(mods)
15089
15090   @staticmethod
15091   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15092     """Check for split instances after a node assignment.
15093
15094     This method considers a series of node assignments as an atomic operation,
15095     and returns information about split instances after applying the set of
15096     changes.
15097
15098     In particular, it returns information about newly split instances, and
15099     instances that were already split, and remain so after the change.
15100
15101     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15102     considered.
15103
15104     @type changes: list of (node_name, new_group_uuid) pairs.
15105     @param changes: list of node assignments to consider.
15106     @param node_data: a dict with data for all nodes
15107     @param instance_data: a dict with all instances to consider
15108     @rtype: a two-tuple
15109     @return: a list of instances that were previously okay and result split as a
15110       consequence of this change, and a list of instances that were previously
15111       split and this change does not fix.
15112
15113     """
15114     changed_nodes = dict((node, group) for node, group in changes
15115                          if node_data[node].group != group)
15116
15117     all_split_instances = set()
15118     previously_split_instances = set()
15119
15120     def InstanceNodes(instance):
15121       return [instance.primary_node] + list(instance.secondary_nodes)
15122
15123     for inst in instance_data.values():
15124       if inst.disk_template not in constants.DTS_INT_MIRROR:
15125         continue
15126
15127       instance_nodes = InstanceNodes(inst)
15128
15129       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15130         previously_split_instances.add(inst.name)
15131
15132       if len(set(changed_nodes.get(node, node_data[node].group)
15133                  for node in instance_nodes)) > 1:
15134         all_split_instances.add(inst.name)
15135
15136     return (list(all_split_instances - previously_split_instances),
15137             list(previously_split_instances & all_split_instances))
15138
15139
15140 class _GroupQuery(_QueryBase):
15141   FIELDS = query.GROUP_FIELDS
15142
15143   def ExpandNames(self, lu):
15144     lu.needed_locks = {}
15145
15146     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15147     self._cluster = lu.cfg.GetClusterInfo()
15148     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15149
15150     if not self.names:
15151       self.wanted = [name_to_uuid[name]
15152                      for name in utils.NiceSort(name_to_uuid.keys())]
15153     else:
15154       # Accept names to be either names or UUIDs.
15155       missing = []
15156       self.wanted = []
15157       all_uuid = frozenset(self._all_groups.keys())
15158
15159       for name in self.names:
15160         if name in all_uuid:
15161           self.wanted.append(name)
15162         elif name in name_to_uuid:
15163           self.wanted.append(name_to_uuid[name])
15164         else:
15165           missing.append(name)
15166
15167       if missing:
15168         raise errors.OpPrereqError("Some groups do not exist: %s" %
15169                                    utils.CommaJoin(missing),
15170                                    errors.ECODE_NOENT)
15171
15172   def DeclareLocks(self, lu, level):
15173     pass
15174
15175   def _GetQueryData(self, lu):
15176     """Computes the list of node groups and their attributes.
15177
15178     """
15179     do_nodes = query.GQ_NODE in self.requested_data
15180     do_instances = query.GQ_INST in self.requested_data
15181
15182     group_to_nodes = None
15183     group_to_instances = None
15184
15185     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15186     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15187     # latter GetAllInstancesInfo() is not enough, for we have to go through
15188     # instance->node. Hence, we will need to process nodes even if we only need
15189     # instance information.
15190     if do_nodes or do_instances:
15191       all_nodes = lu.cfg.GetAllNodesInfo()
15192       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15193       node_to_group = {}
15194
15195       for node in all_nodes.values():
15196         if node.group in group_to_nodes:
15197           group_to_nodes[node.group].append(node.name)
15198           node_to_group[node.name] = node.group
15199
15200       if do_instances:
15201         all_instances = lu.cfg.GetAllInstancesInfo()
15202         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15203
15204         for instance in all_instances.values():
15205           node = instance.primary_node
15206           if node in node_to_group:
15207             group_to_instances[node_to_group[node]].append(instance.name)
15208
15209         if not do_nodes:
15210           # Do not pass on node information if it was not requested.
15211           group_to_nodes = None
15212
15213     return query.GroupQueryData(self._cluster,
15214                                 [self._all_groups[uuid]
15215                                  for uuid in self.wanted],
15216                                 group_to_nodes, group_to_instances,
15217                                 query.GQ_DISKPARAMS in self.requested_data)
15218
15219
15220 class LUGroupQuery(NoHooksLU):
15221   """Logical unit for querying node groups.
15222
15223   """
15224   REQ_BGL = False
15225
15226   def CheckArguments(self):
15227     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15228                           self.op.output_fields, False)
15229
15230   def ExpandNames(self):
15231     self.gq.ExpandNames(self)
15232
15233   def DeclareLocks(self, level):
15234     self.gq.DeclareLocks(self, level)
15235
15236   def Exec(self, feedback_fn):
15237     return self.gq.OldStyleQuery(self)
15238
15239
15240 class LUGroupSetParams(LogicalUnit):
15241   """Modifies the parameters of a node group.
15242
15243   """
15244   HPATH = "group-modify"
15245   HTYPE = constants.HTYPE_GROUP
15246   REQ_BGL = False
15247
15248   def CheckArguments(self):
15249     all_changes = [
15250       self.op.ndparams,
15251       self.op.diskparams,
15252       self.op.alloc_policy,
15253       self.op.hv_state,
15254       self.op.disk_state,
15255       self.op.ipolicy,
15256       ]
15257
15258     if all_changes.count(None) == len(all_changes):
15259       raise errors.OpPrereqError("Please pass at least one modification",
15260                                  errors.ECODE_INVAL)
15261
15262   def ExpandNames(self):
15263     # This raises errors.OpPrereqError on its own:
15264     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15265
15266     self.needed_locks = {
15267       locking.LEVEL_INSTANCE: [],
15268       locking.LEVEL_NODEGROUP: [self.group_uuid],
15269       }
15270
15271     self.share_locks[locking.LEVEL_INSTANCE] = 1
15272
15273   def DeclareLocks(self, level):
15274     if level == locking.LEVEL_INSTANCE:
15275       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15276
15277       # Lock instances optimistically, needs verification once group lock has
15278       # been acquired
15279       self.needed_locks[locking.LEVEL_INSTANCE] = \
15280           self.cfg.GetNodeGroupInstances(self.group_uuid)
15281
15282   @staticmethod
15283   def _UpdateAndVerifyDiskParams(old, new):
15284     """Updates and verifies disk parameters.
15285
15286     """
15287     new_params = _GetUpdatedParams(old, new)
15288     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15289     return new_params
15290
15291   def CheckPrereq(self):
15292     """Check prerequisites.
15293
15294     """
15295     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15296
15297     # Check if locked instances are still correct
15298     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15299
15300     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15301     cluster = self.cfg.GetClusterInfo()
15302
15303     if self.group is None:
15304       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15305                                (self.op.group_name, self.group_uuid))
15306
15307     if self.op.ndparams:
15308       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15309       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15310       self.new_ndparams = new_ndparams
15311
15312     if self.op.diskparams:
15313       diskparams = self.group.diskparams
15314       uavdp = self._UpdateAndVerifyDiskParams
15315       # For each disktemplate subdict update and verify the values
15316       new_diskparams = dict((dt,
15317                              uavdp(diskparams.get(dt, {}),
15318                                    self.op.diskparams[dt]))
15319                             for dt in constants.DISK_TEMPLATES
15320                             if dt in self.op.diskparams)
15321       # As we've all subdicts of diskparams ready, lets merge the actual
15322       # dict with all updated subdicts
15323       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15324       try:
15325         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15326       except errors.OpPrereqError, err:
15327         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15328                                    errors.ECODE_INVAL)
15329
15330     if self.op.hv_state:
15331       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15332                                                  self.group.hv_state_static)
15333
15334     if self.op.disk_state:
15335       self.new_disk_state = \
15336         _MergeAndVerifyDiskState(self.op.disk_state,
15337                                  self.group.disk_state_static)
15338
15339     if self.op.ipolicy:
15340       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15341                                             self.op.ipolicy,
15342                                             group_policy=True)
15343
15344       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15345       inst_filter = lambda inst: inst.name in owned_instances
15346       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15347       gmi = ganeti.masterd.instance
15348       violations = \
15349           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15350                                                                   self.group),
15351                                         new_ipolicy, instances)
15352
15353       if violations:
15354         self.LogWarning("After the ipolicy change the following instances"
15355                         " violate them: %s",
15356                         utils.CommaJoin(violations))
15357
15358   def BuildHooksEnv(self):
15359     """Build hooks env.
15360
15361     """
15362     return {
15363       "GROUP_NAME": self.op.group_name,
15364       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15365       }
15366
15367   def BuildHooksNodes(self):
15368     """Build hooks nodes.
15369
15370     """
15371     mn = self.cfg.GetMasterNode()
15372     return ([mn], [mn])
15373
15374   def Exec(self, feedback_fn):
15375     """Modifies the node group.
15376
15377     """
15378     result = []
15379
15380     if self.op.ndparams:
15381       self.group.ndparams = self.new_ndparams
15382       result.append(("ndparams", str(self.group.ndparams)))
15383
15384     if self.op.diskparams:
15385       self.group.diskparams = self.new_diskparams
15386       result.append(("diskparams", str(self.group.diskparams)))
15387
15388     if self.op.alloc_policy:
15389       self.group.alloc_policy = self.op.alloc_policy
15390
15391     if self.op.hv_state:
15392       self.group.hv_state_static = self.new_hv_state
15393
15394     if self.op.disk_state:
15395       self.group.disk_state_static = self.new_disk_state
15396
15397     if self.op.ipolicy:
15398       self.group.ipolicy = self.new_ipolicy
15399
15400     self.cfg.Update(self.group, feedback_fn)
15401     return result
15402
15403
15404 class LUGroupRemove(LogicalUnit):
15405   HPATH = "group-remove"
15406   HTYPE = constants.HTYPE_GROUP
15407   REQ_BGL = False
15408
15409   def ExpandNames(self):
15410     # This will raises errors.OpPrereqError on its own:
15411     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15412     self.needed_locks = {
15413       locking.LEVEL_NODEGROUP: [self.group_uuid],
15414       }
15415
15416   def CheckPrereq(self):
15417     """Check prerequisites.
15418
15419     This checks that the given group name exists as a node group, that is
15420     empty (i.e., contains no nodes), and that is not the last group of the
15421     cluster.
15422
15423     """
15424     # Verify that the group is empty.
15425     group_nodes = [node.name
15426                    for node in self.cfg.GetAllNodesInfo().values()
15427                    if node.group == self.group_uuid]
15428
15429     if group_nodes:
15430       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15431                                  " nodes: %s" %
15432                                  (self.op.group_name,
15433                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15434                                  errors.ECODE_STATE)
15435
15436     # Verify the cluster would not be left group-less.
15437     if len(self.cfg.GetNodeGroupList()) == 1:
15438       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15439                                  " removed" % self.op.group_name,
15440                                  errors.ECODE_STATE)
15441
15442   def BuildHooksEnv(self):
15443     """Build hooks env.
15444
15445     """
15446     return {
15447       "GROUP_NAME": self.op.group_name,
15448       }
15449
15450   def BuildHooksNodes(self):
15451     """Build hooks nodes.
15452
15453     """
15454     mn = self.cfg.GetMasterNode()
15455     return ([mn], [mn])
15456
15457   def Exec(self, feedback_fn):
15458     """Remove the node group.
15459
15460     """
15461     try:
15462       self.cfg.RemoveNodeGroup(self.group_uuid)
15463     except errors.ConfigurationError:
15464       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15465                                (self.op.group_name, self.group_uuid))
15466
15467     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15468
15469
15470 class LUGroupRename(LogicalUnit):
15471   HPATH = "group-rename"
15472   HTYPE = constants.HTYPE_GROUP
15473   REQ_BGL = False
15474
15475   def ExpandNames(self):
15476     # This raises errors.OpPrereqError on its own:
15477     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15478
15479     self.needed_locks = {
15480       locking.LEVEL_NODEGROUP: [self.group_uuid],
15481       }
15482
15483   def CheckPrereq(self):
15484     """Check prerequisites.
15485
15486     Ensures requested new name is not yet used.
15487
15488     """
15489     try:
15490       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15491     except errors.OpPrereqError:
15492       pass
15493     else:
15494       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15495                                  " node group (UUID: %s)" %
15496                                  (self.op.new_name, new_name_uuid),
15497                                  errors.ECODE_EXISTS)
15498
15499   def BuildHooksEnv(self):
15500     """Build hooks env.
15501
15502     """
15503     return {
15504       "OLD_NAME": self.op.group_name,
15505       "NEW_NAME": self.op.new_name,
15506       }
15507
15508   def BuildHooksNodes(self):
15509     """Build hooks nodes.
15510
15511     """
15512     mn = self.cfg.GetMasterNode()
15513
15514     all_nodes = self.cfg.GetAllNodesInfo()
15515     all_nodes.pop(mn, None)
15516
15517     run_nodes = [mn]
15518     run_nodes.extend(node.name for node in all_nodes.values()
15519                      if node.group == self.group_uuid)
15520
15521     return (run_nodes, run_nodes)
15522
15523   def Exec(self, feedback_fn):
15524     """Rename the node group.
15525
15526     """
15527     group = self.cfg.GetNodeGroup(self.group_uuid)
15528
15529     if group is None:
15530       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15531                                (self.op.group_name, self.group_uuid))
15532
15533     group.name = self.op.new_name
15534     self.cfg.Update(group, feedback_fn)
15535
15536     return self.op.new_name
15537
15538
15539 class LUGroupEvacuate(LogicalUnit):
15540   HPATH = "group-evacuate"
15541   HTYPE = constants.HTYPE_GROUP
15542   REQ_BGL = False
15543
15544   def ExpandNames(self):
15545     # This raises errors.OpPrereqError on its own:
15546     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15547
15548     if self.op.target_groups:
15549       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15550                                   self.op.target_groups)
15551     else:
15552       self.req_target_uuids = []
15553
15554     if self.group_uuid in self.req_target_uuids:
15555       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15556                                  " as a target group (targets are %s)" %
15557                                  (self.group_uuid,
15558                                   utils.CommaJoin(self.req_target_uuids)),
15559                                  errors.ECODE_INVAL)
15560
15561     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15562
15563     self.share_locks = _ShareAll()
15564     self.needed_locks = {
15565       locking.LEVEL_INSTANCE: [],
15566       locking.LEVEL_NODEGROUP: [],
15567       locking.LEVEL_NODE: [],
15568       }
15569
15570   def DeclareLocks(self, level):
15571     if level == locking.LEVEL_INSTANCE:
15572       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15573
15574       # Lock instances optimistically, needs verification once node and group
15575       # locks have been acquired
15576       self.needed_locks[locking.LEVEL_INSTANCE] = \
15577         self.cfg.GetNodeGroupInstances(self.group_uuid)
15578
15579     elif level == locking.LEVEL_NODEGROUP:
15580       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15581
15582       if self.req_target_uuids:
15583         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15584
15585         # Lock all groups used by instances optimistically; this requires going
15586         # via the node before it's locked, requiring verification later on
15587         lock_groups.update(group_uuid
15588                            for instance_name in
15589                              self.owned_locks(locking.LEVEL_INSTANCE)
15590                            for group_uuid in
15591                              self.cfg.GetInstanceNodeGroups(instance_name))
15592       else:
15593         # No target groups, need to lock all of them
15594         lock_groups = locking.ALL_SET
15595
15596       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15597
15598     elif level == locking.LEVEL_NODE:
15599       # This will only lock the nodes in the group to be evacuated which
15600       # contain actual instances
15601       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15602       self._LockInstancesNodes()
15603
15604       # Lock all nodes in group to be evacuated and target groups
15605       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15606       assert self.group_uuid in owned_groups
15607       member_nodes = [node_name
15608                       for group in owned_groups
15609                       for node_name in self.cfg.GetNodeGroup(group).members]
15610       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15611
15612   def CheckPrereq(self):
15613     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15614     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15615     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15616
15617     assert owned_groups.issuperset(self.req_target_uuids)
15618     assert self.group_uuid in owned_groups
15619
15620     # Check if locked instances are still correct
15621     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15622
15623     # Get instance information
15624     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15625
15626     # Check if node groups for locked instances are still correct
15627     _CheckInstancesNodeGroups(self.cfg, self.instances,
15628                               owned_groups, owned_nodes, self.group_uuid)
15629
15630     if self.req_target_uuids:
15631       # User requested specific target groups
15632       self.target_uuids = self.req_target_uuids
15633     else:
15634       # All groups except the one to be evacuated are potential targets
15635       self.target_uuids = [group_uuid for group_uuid in owned_groups
15636                            if group_uuid != self.group_uuid]
15637
15638       if not self.target_uuids:
15639         raise errors.OpPrereqError("There are no possible target groups",
15640                                    errors.ECODE_INVAL)
15641
15642   def BuildHooksEnv(self):
15643     """Build hooks env.
15644
15645     """
15646     return {
15647       "GROUP_NAME": self.op.group_name,
15648       "TARGET_GROUPS": " ".join(self.target_uuids),
15649       }
15650
15651   def BuildHooksNodes(self):
15652     """Build hooks nodes.
15653
15654     """
15655     mn = self.cfg.GetMasterNode()
15656
15657     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15658
15659     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15660
15661     return (run_nodes, run_nodes)
15662
15663   def Exec(self, feedback_fn):
15664     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15665
15666     assert self.group_uuid not in self.target_uuids
15667
15668     req = iallocator.IAReqGroupChange(instances=instances,
15669                                       target_groups=self.target_uuids)
15670     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15671
15672     ial.Run(self.op.iallocator)
15673
15674     if not ial.success:
15675       raise errors.OpPrereqError("Can't compute group evacuation using"
15676                                  " iallocator '%s': %s" %
15677                                  (self.op.iallocator, ial.info),
15678                                  errors.ECODE_NORES)
15679
15680     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15681
15682     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15683                  len(jobs), self.op.group_name)
15684
15685     return ResultWithJobs(jobs)
15686
15687
15688 class TagsLU(NoHooksLU): # pylint: disable=W0223
15689   """Generic tags LU.
15690
15691   This is an abstract class which is the parent of all the other tags LUs.
15692
15693   """
15694   def ExpandNames(self):
15695     self.group_uuid = None
15696     self.needed_locks = {}
15697
15698     if self.op.kind == constants.TAG_NODE:
15699       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15700       lock_level = locking.LEVEL_NODE
15701       lock_name = self.op.name
15702     elif self.op.kind == constants.TAG_INSTANCE:
15703       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15704       lock_level = locking.LEVEL_INSTANCE
15705       lock_name = self.op.name
15706     elif self.op.kind == constants.TAG_NODEGROUP:
15707       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15708       lock_level = locking.LEVEL_NODEGROUP
15709       lock_name = self.group_uuid
15710     elif self.op.kind == constants.TAG_NETWORK:
15711       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15712       lock_level = locking.LEVEL_NETWORK
15713       lock_name = self.network_uuid
15714     else:
15715       lock_level = None
15716       lock_name = None
15717
15718     if lock_level and getattr(self.op, "use_locking", True):
15719       self.needed_locks[lock_level] = lock_name
15720
15721     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15722     # not possible to acquire the BGL based on opcode parameters)
15723
15724   def CheckPrereq(self):
15725     """Check prerequisites.
15726
15727     """
15728     if self.op.kind == constants.TAG_CLUSTER:
15729       self.target = self.cfg.GetClusterInfo()
15730     elif self.op.kind == constants.TAG_NODE:
15731       self.target = self.cfg.GetNodeInfo(self.op.name)
15732     elif self.op.kind == constants.TAG_INSTANCE:
15733       self.target = self.cfg.GetInstanceInfo(self.op.name)
15734     elif self.op.kind == constants.TAG_NODEGROUP:
15735       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15736     elif self.op.kind == constants.TAG_NETWORK:
15737       self.target = self.cfg.GetNetwork(self.network_uuid)
15738     else:
15739       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15740                                  str(self.op.kind), errors.ECODE_INVAL)
15741
15742
15743 class LUTagsGet(TagsLU):
15744   """Returns the tags of a given object.
15745
15746   """
15747   REQ_BGL = False
15748
15749   def ExpandNames(self):
15750     TagsLU.ExpandNames(self)
15751
15752     # Share locks as this is only a read operation
15753     self.share_locks = _ShareAll()
15754
15755   def Exec(self, feedback_fn):
15756     """Returns the tag list.
15757
15758     """
15759     return list(self.target.GetTags())
15760
15761
15762 class LUTagsSearch(NoHooksLU):
15763   """Searches the tags for a given pattern.
15764
15765   """
15766   REQ_BGL = False
15767
15768   def ExpandNames(self):
15769     self.needed_locks = {}
15770
15771   def CheckPrereq(self):
15772     """Check prerequisites.
15773
15774     This checks the pattern passed for validity by compiling it.
15775
15776     """
15777     try:
15778       self.re = re.compile(self.op.pattern)
15779     except re.error, err:
15780       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15781                                  (self.op.pattern, err), errors.ECODE_INVAL)
15782
15783   def Exec(self, feedback_fn):
15784     """Returns the tag list.
15785
15786     """
15787     cfg = self.cfg
15788     tgts = [("/cluster", cfg.GetClusterInfo())]
15789     ilist = cfg.GetAllInstancesInfo().values()
15790     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15791     nlist = cfg.GetAllNodesInfo().values()
15792     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15793     tgts.extend(("/nodegroup/%s" % n.name, n)
15794                 for n in cfg.GetAllNodeGroupsInfo().values())
15795     results = []
15796     for path, target in tgts:
15797       for tag in target.GetTags():
15798         if self.re.search(tag):
15799           results.append((path, tag))
15800     return results
15801
15802
15803 class LUTagsSet(TagsLU):
15804   """Sets a tag on a given object.
15805
15806   """
15807   REQ_BGL = False
15808
15809   def CheckPrereq(self):
15810     """Check prerequisites.
15811
15812     This checks the type and length of the tag name and value.
15813
15814     """
15815     TagsLU.CheckPrereq(self)
15816     for tag in self.op.tags:
15817       objects.TaggableObject.ValidateTag(tag)
15818
15819   def Exec(self, feedback_fn):
15820     """Sets the tag.
15821
15822     """
15823     try:
15824       for tag in self.op.tags:
15825         self.target.AddTag(tag)
15826     except errors.TagError, err:
15827       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15828     self.cfg.Update(self.target, feedback_fn)
15829
15830
15831 class LUTagsDel(TagsLU):
15832   """Delete a list of tags from a given object.
15833
15834   """
15835   REQ_BGL = False
15836
15837   def CheckPrereq(self):
15838     """Check prerequisites.
15839
15840     This checks that we have the given tag.
15841
15842     """
15843     TagsLU.CheckPrereq(self)
15844     for tag in self.op.tags:
15845       objects.TaggableObject.ValidateTag(tag)
15846     del_tags = frozenset(self.op.tags)
15847     cur_tags = self.target.GetTags()
15848
15849     diff_tags = del_tags - cur_tags
15850     if diff_tags:
15851       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15852       raise errors.OpPrereqError("Tag(s) %s not found" %
15853                                  (utils.CommaJoin(diff_names), ),
15854                                  errors.ECODE_NOENT)
15855
15856   def Exec(self, feedback_fn):
15857     """Remove the tag from the object.
15858
15859     """
15860     for tag in self.op.tags:
15861       self.target.RemoveTag(tag)
15862     self.cfg.Update(self.target, feedback_fn)
15863
15864
15865 class LUTestDelay(NoHooksLU):
15866   """Sleep for a specified amount of time.
15867
15868   This LU sleeps on the master and/or nodes for a specified amount of
15869   time.
15870
15871   """
15872   REQ_BGL = False
15873
15874   def ExpandNames(self):
15875     """Expand names and set required locks.
15876
15877     This expands the node list, if any.
15878
15879     """
15880     self.needed_locks = {}
15881     if self.op.on_nodes:
15882       # _GetWantedNodes can be used here, but is not always appropriate to use
15883       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15884       # more information.
15885       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15886       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15887
15888   def _TestDelay(self):
15889     """Do the actual sleep.
15890
15891     """
15892     if self.op.on_master:
15893       if not utils.TestDelay(self.op.duration):
15894         raise errors.OpExecError("Error during master delay test")
15895     if self.op.on_nodes:
15896       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15897       for node, node_result in result.items():
15898         node_result.Raise("Failure during rpc call to node %s" % node)
15899
15900   def Exec(self, feedback_fn):
15901     """Execute the test delay opcode, with the wanted repetitions.
15902
15903     """
15904     if self.op.repeat == 0:
15905       self._TestDelay()
15906     else:
15907       top_value = self.op.repeat - 1
15908       for i in range(self.op.repeat):
15909         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15910         self._TestDelay()
15911
15912
15913 class LURestrictedCommand(NoHooksLU):
15914   """Logical unit for executing restricted commands.
15915
15916   """
15917   REQ_BGL = False
15918
15919   def ExpandNames(self):
15920     if self.op.nodes:
15921       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15922
15923     self.needed_locks = {
15924       locking.LEVEL_NODE: self.op.nodes,
15925       }
15926     self.share_locks = {
15927       locking.LEVEL_NODE: not self.op.use_locking,
15928       }
15929
15930   def CheckPrereq(self):
15931     """Check prerequisites.
15932
15933     """
15934
15935   def Exec(self, feedback_fn):
15936     """Execute restricted command and return output.
15937
15938     """
15939     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15940
15941     # Check if correct locks are held
15942     assert set(self.op.nodes).issubset(owned_nodes)
15943
15944     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15945
15946     result = []
15947
15948     for node_name in self.op.nodes:
15949       nres = rpcres[node_name]
15950       if nres.fail_msg:
15951         msg = ("Command '%s' on node '%s' failed: %s" %
15952                (self.op.command, node_name, nres.fail_msg))
15953         result.append((False, msg))
15954       else:
15955         result.append((True, nres.payload))
15956
15957     return result
15958
15959
15960 class LUTestJqueue(NoHooksLU):
15961   """Utility LU to test some aspects of the job queue.
15962
15963   """
15964   REQ_BGL = False
15965
15966   # Must be lower than default timeout for WaitForJobChange to see whether it
15967   # notices changed jobs
15968   _CLIENT_CONNECT_TIMEOUT = 20.0
15969   _CLIENT_CONFIRM_TIMEOUT = 60.0
15970
15971   @classmethod
15972   def _NotifyUsingSocket(cls, cb, errcls):
15973     """Opens a Unix socket and waits for another program to connect.
15974
15975     @type cb: callable
15976     @param cb: Callback to send socket name to client
15977     @type errcls: class
15978     @param errcls: Exception class to use for errors
15979
15980     """
15981     # Using a temporary directory as there's no easy way to create temporary
15982     # sockets without writing a custom loop around tempfile.mktemp and
15983     # socket.bind
15984     tmpdir = tempfile.mkdtemp()
15985     try:
15986       tmpsock = utils.PathJoin(tmpdir, "sock")
15987
15988       logging.debug("Creating temporary socket at %s", tmpsock)
15989       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15990       try:
15991         sock.bind(tmpsock)
15992         sock.listen(1)
15993
15994         # Send details to client
15995         cb(tmpsock)
15996
15997         # Wait for client to connect before continuing
15998         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15999         try:
16000           (conn, _) = sock.accept()
16001         except socket.error, err:
16002           raise errcls("Client didn't connect in time (%s)" % err)
16003       finally:
16004         sock.close()
16005     finally:
16006       # Remove as soon as client is connected
16007       shutil.rmtree(tmpdir)
16008
16009     # Wait for client to close
16010     try:
16011       try:
16012         # pylint: disable=E1101
16013         # Instance of '_socketobject' has no ... member
16014         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16015         conn.recv(1)
16016       except socket.error, err:
16017         raise errcls("Client failed to confirm notification (%s)" % err)
16018     finally:
16019       conn.close()
16020
16021   def _SendNotification(self, test, arg, sockname):
16022     """Sends a notification to the client.
16023
16024     @type test: string
16025     @param test: Test name
16026     @param arg: Test argument (depends on test)
16027     @type sockname: string
16028     @param sockname: Socket path
16029
16030     """
16031     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16032
16033   def _Notify(self, prereq, test, arg):
16034     """Notifies the client of a test.
16035
16036     @type prereq: bool
16037     @param prereq: Whether this is a prereq-phase test
16038     @type test: string
16039     @param test: Test name
16040     @param arg: Test argument (depends on test)
16041
16042     """
16043     if prereq:
16044       errcls = errors.OpPrereqError
16045     else:
16046       errcls = errors.OpExecError
16047
16048     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16049                                                   test, arg),
16050                                    errcls)
16051
16052   def CheckArguments(self):
16053     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16054     self.expandnames_calls = 0
16055
16056   def ExpandNames(self):
16057     checkargs_calls = getattr(self, "checkargs_calls", 0)
16058     if checkargs_calls < 1:
16059       raise errors.ProgrammerError("CheckArguments was not called")
16060
16061     self.expandnames_calls += 1
16062
16063     if self.op.notify_waitlock:
16064       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16065
16066     self.LogInfo("Expanding names")
16067
16068     # Get lock on master node (just to get a lock, not for a particular reason)
16069     self.needed_locks = {
16070       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16071       }
16072
16073   def Exec(self, feedback_fn):
16074     if self.expandnames_calls < 1:
16075       raise errors.ProgrammerError("ExpandNames was not called")
16076
16077     if self.op.notify_exec:
16078       self._Notify(False, constants.JQT_EXEC, None)
16079
16080     self.LogInfo("Executing")
16081
16082     if self.op.log_messages:
16083       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16084       for idx, msg in enumerate(self.op.log_messages):
16085         self.LogInfo("Sending log message %s", idx + 1)
16086         feedback_fn(constants.JQT_MSGPREFIX + msg)
16087         # Report how many test messages have been sent
16088         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16089
16090     if self.op.fail:
16091       raise errors.OpExecError("Opcode failure was requested")
16092
16093     return True
16094
16095
16096 class LUTestAllocator(NoHooksLU):
16097   """Run allocator tests.
16098
16099   This LU runs the allocator tests
16100
16101   """
16102   def CheckPrereq(self):
16103     """Check prerequisites.
16104
16105     This checks the opcode parameters depending on the director and mode test.
16106
16107     """
16108     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16109                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16110       for attr in ["memory", "disks", "disk_template",
16111                    "os", "tags", "nics", "vcpus"]:
16112         if not hasattr(self.op, attr):
16113           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16114                                      attr, errors.ECODE_INVAL)
16115       iname = self.cfg.ExpandInstanceName(self.op.name)
16116       if iname is not None:
16117         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16118                                    iname, errors.ECODE_EXISTS)
16119       if not isinstance(self.op.nics, list):
16120         raise errors.OpPrereqError("Invalid parameter 'nics'",
16121                                    errors.ECODE_INVAL)
16122       if not isinstance(self.op.disks, list):
16123         raise errors.OpPrereqError("Invalid parameter 'disks'",
16124                                    errors.ECODE_INVAL)
16125       for row in self.op.disks:
16126         if (not isinstance(row, dict) or
16127             constants.IDISK_SIZE not in row or
16128             not isinstance(row[constants.IDISK_SIZE], int) or
16129             constants.IDISK_MODE not in row or
16130             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16131           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16132                                      " parameter", errors.ECODE_INVAL)
16133       if self.op.hypervisor is None:
16134         self.op.hypervisor = self.cfg.GetHypervisorType()
16135     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16136       fname = _ExpandInstanceName(self.cfg, self.op.name)
16137       self.op.name = fname
16138       self.relocate_from = \
16139           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16140     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16141                           constants.IALLOCATOR_MODE_NODE_EVAC):
16142       if not self.op.instances:
16143         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16144       self.op.instances = _GetWantedInstances(self, self.op.instances)
16145     else:
16146       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16147                                  self.op.mode, errors.ECODE_INVAL)
16148
16149     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16150       if self.op.iallocator is None:
16151         raise errors.OpPrereqError("Missing allocator name",
16152                                    errors.ECODE_INVAL)
16153     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16154       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16155                                  self.op.direction, errors.ECODE_INVAL)
16156
16157   def Exec(self, feedback_fn):
16158     """Run the allocator test.
16159
16160     """
16161     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16162       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16163                                           memory=self.op.memory,
16164                                           disks=self.op.disks,
16165                                           disk_template=self.op.disk_template,
16166                                           os=self.op.os,
16167                                           tags=self.op.tags,
16168                                           nics=self.op.nics,
16169                                           vcpus=self.op.vcpus,
16170                                           spindle_use=self.op.spindle_use,
16171                                           hypervisor=self.op.hypervisor)
16172     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16173       req = iallocator.IAReqRelocate(name=self.op.name,
16174                                      relocate_from=list(self.relocate_from))
16175     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16176       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16177                                         target_groups=self.op.target_groups)
16178     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16179       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16180                                      evac_mode=self.op.evac_mode)
16181     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16182       disk_template = self.op.disk_template
16183       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16184                                              memory=self.op.memory,
16185                                              disks=self.op.disks,
16186                                              disk_template=disk_template,
16187                                              os=self.op.os,
16188                                              tags=self.op.tags,
16189                                              nics=self.op.nics,
16190                                              vcpus=self.op.vcpus,
16191                                              spindle_use=self.op.spindle_use,
16192                                              hypervisor=self.op.hypervisor)
16193                for idx in range(self.op.count)]
16194       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16195     else:
16196       raise errors.ProgrammerError("Uncatched mode %s in"
16197                                    " LUTestAllocator.Exec", self.op.mode)
16198
16199     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16200     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16201       result = ial.in_text
16202     else:
16203       ial.Run(self.op.iallocator, validate=False)
16204       result = ial.out_text
16205     return result
16206
16207
16208 class LUNetworkAdd(LogicalUnit):
16209   """Logical unit for creating networks.
16210
16211   """
16212   HPATH = "network-add"
16213   HTYPE = constants.HTYPE_NETWORK
16214   REQ_BGL = False
16215
16216   def BuildHooksNodes(self):
16217     """Build hooks nodes.
16218
16219     """
16220     mn = self.cfg.GetMasterNode()
16221     return ([mn], [mn])
16222
16223   def CheckArguments(self):
16224     if self.op.mac_prefix:
16225       self.op.mac_prefix = \
16226         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16227
16228   def ExpandNames(self):
16229     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16230
16231     if self.op.conflicts_check:
16232       self.share_locks[locking.LEVEL_NODE] = 1
16233       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16234       self.needed_locks = {
16235         locking.LEVEL_NODE: locking.ALL_SET,
16236         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16237         }
16238     else:
16239       self.needed_locks = {}
16240
16241     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16242
16243   def CheckPrereq(self):
16244     if self.op.network is None:
16245       raise errors.OpPrereqError("Network must be given",
16246                                  errors.ECODE_INVAL)
16247
16248     try:
16249       existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16250     except errors.OpPrereqError:
16251       pass
16252     else:
16253       raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16254                                  " network (UUID: %s)" %
16255                                  (self.op.network_name, existing_uuid),
16256                                  errors.ECODE_EXISTS)
16257
16258     # Check tag validity
16259     for tag in self.op.tags:
16260       objects.TaggableObject.ValidateTag(tag)
16261
16262   def BuildHooksEnv(self):
16263     """Build hooks env.
16264
16265     """
16266     args = {
16267       "name": self.op.network_name,
16268       "subnet": self.op.network,
16269       "gateway": self.op.gateway,
16270       "network6": self.op.network6,
16271       "gateway6": self.op.gateway6,
16272       "mac_prefix": self.op.mac_prefix,
16273       "tags": self.op.tags,
16274       }
16275     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16276
16277   def Exec(self, feedback_fn):
16278     """Add the ip pool to the cluster.
16279
16280     """
16281     nobj = objects.Network(name=self.op.network_name,
16282                            network=self.op.network,
16283                            gateway=self.op.gateway,
16284                            network6=self.op.network6,
16285                            gateway6=self.op.gateway6,
16286                            mac_prefix=self.op.mac_prefix,
16287                            uuid=self.network_uuid)
16288     # Initialize the associated address pool
16289     try:
16290       pool = network.AddressPool.InitializeNetwork(nobj)
16291     except errors.AddressPoolError, err:
16292       raise errors.OpExecError("Cannot create IP address pool for network"
16293                                " '%s': %s" % (self.op.network_name, err))
16294
16295     # Check if we need to reserve the nodes and the cluster master IP
16296     # These may not be allocated to any instances in routed mode, as
16297     # they wouldn't function anyway.
16298     if self.op.conflicts_check:
16299       for node in self.cfg.GetAllNodesInfo().values():
16300         for ip in [node.primary_ip, node.secondary_ip]:
16301           try:
16302             if pool.Contains(ip):
16303               pool.Reserve(ip)
16304               self.LogInfo("Reserved IP address of node '%s' (%s)",
16305                            node.name, ip)
16306           except errors.AddressPoolError, err:
16307             self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16308                             ip, node.name, err)
16309
16310       master_ip = self.cfg.GetClusterInfo().master_ip
16311       try:
16312         if pool.Contains(master_ip):
16313           pool.Reserve(master_ip)
16314           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16315       except errors.AddressPoolError, err:
16316         self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16317                         master_ip, err)
16318
16319     if self.op.add_reserved_ips:
16320       for ip in self.op.add_reserved_ips:
16321         try:
16322           pool.Reserve(ip, external=True)
16323         except errors.AddressPoolError, err:
16324           raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16325                                    (ip, err))
16326
16327     if self.op.tags:
16328       for tag in self.op.tags:
16329         nobj.AddTag(tag)
16330
16331     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16332     del self.remove_locks[locking.LEVEL_NETWORK]
16333
16334
16335 class LUNetworkRemove(LogicalUnit):
16336   HPATH = "network-remove"
16337   HTYPE = constants.HTYPE_NETWORK
16338   REQ_BGL = False
16339
16340   def ExpandNames(self):
16341     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16342
16343     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16344     self.needed_locks = {
16345       locking.LEVEL_NETWORK: [self.network_uuid],
16346       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16347       }
16348
16349   def CheckPrereq(self):
16350     """Check prerequisites.
16351
16352     This checks that the given network name exists as a network, that is
16353     empty (i.e., contains no nodes), and that is not the last group of the
16354     cluster.
16355
16356     """
16357     # Verify that the network is not conncted.
16358     node_groups = [group.name
16359                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16360                    if self.network_uuid in group.networks]
16361
16362     if node_groups:
16363       self.LogWarning("Network '%s' is connected to the following"
16364                       " node groups: %s" %
16365                       (self.op.network_name,
16366                        utils.CommaJoin(utils.NiceSort(node_groups))))
16367       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16368
16369   def BuildHooksEnv(self):
16370     """Build hooks env.
16371
16372     """
16373     return {
16374       "NETWORK_NAME": self.op.network_name,
16375       }
16376
16377   def BuildHooksNodes(self):
16378     """Build hooks nodes.
16379
16380     """
16381     mn = self.cfg.GetMasterNode()
16382     return ([mn], [mn])
16383
16384   def Exec(self, feedback_fn):
16385     """Remove the network.
16386
16387     """
16388     try:
16389       self.cfg.RemoveNetwork(self.network_uuid)
16390     except errors.ConfigurationError:
16391       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16392                                (self.op.network_name, self.network_uuid))
16393
16394
16395 class LUNetworkSetParams(LogicalUnit):
16396   """Modifies the parameters of a network.
16397
16398   """
16399   HPATH = "network-modify"
16400   HTYPE = constants.HTYPE_NETWORK
16401   REQ_BGL = False
16402
16403   def CheckArguments(self):
16404     if (self.op.gateway and
16405         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16406       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16407                                  " at once", errors.ECODE_INVAL)
16408
16409   def ExpandNames(self):
16410     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16411
16412     self.needed_locks = {
16413       locking.LEVEL_NETWORK: [self.network_uuid],
16414       }
16415
16416   def CheckPrereq(self):
16417     """Check prerequisites.
16418
16419     """
16420     self.network = self.cfg.GetNetwork(self.network_uuid)
16421     self.gateway = self.network.gateway
16422     self.mac_prefix = self.network.mac_prefix
16423     self.network6 = self.network.network6
16424     self.gateway6 = self.network.gateway6
16425     self.tags = self.network.tags
16426
16427     self.pool = network.AddressPool(self.network)
16428
16429     if self.op.gateway:
16430       if self.op.gateway == constants.VALUE_NONE:
16431         self.gateway = None
16432       else:
16433         self.gateway = self.op.gateway
16434         if self.pool.IsReserved(self.gateway):
16435           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16436                                      " reserved" % self.gateway,
16437                                      errors.ECODE_STATE)
16438
16439     if self.op.mac_prefix:
16440       if self.op.mac_prefix == constants.VALUE_NONE:
16441         self.mac_prefix = None
16442       else:
16443         self.mac_prefix = \
16444           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16445
16446     if self.op.gateway6:
16447       if self.op.gateway6 == constants.VALUE_NONE:
16448         self.gateway6 = None
16449       else:
16450         self.gateway6 = self.op.gateway6
16451
16452     if self.op.network6:
16453       if self.op.network6 == constants.VALUE_NONE:
16454         self.network6 = None
16455       else:
16456         self.network6 = self.op.network6
16457
16458   def BuildHooksEnv(self):
16459     """Build hooks env.
16460
16461     """
16462     args = {
16463       "name": self.op.network_name,
16464       "subnet": self.network.network,
16465       "gateway": self.gateway,
16466       "network6": self.network6,
16467       "gateway6": self.gateway6,
16468       "mac_prefix": self.mac_prefix,
16469       "tags": self.tags,
16470       }
16471     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16472
16473   def BuildHooksNodes(self):
16474     """Build hooks nodes.
16475
16476     """
16477     mn = self.cfg.GetMasterNode()
16478     return ([mn], [mn])
16479
16480   def Exec(self, feedback_fn):
16481     """Modifies the network.
16482
16483     """
16484     #TODO: reserve/release via temporary reservation manager
16485     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16486     if self.op.gateway:
16487       if self.gateway == self.network.gateway:
16488         self.LogWarning("Gateway is already %s", self.gateway)
16489       else:
16490         if self.gateway:
16491           self.pool.Reserve(self.gateway, external=True)
16492         if self.network.gateway:
16493           self.pool.Release(self.network.gateway, external=True)
16494         self.network.gateway = self.gateway
16495
16496     if self.op.add_reserved_ips:
16497       for ip in self.op.add_reserved_ips:
16498         try:
16499           if self.pool.IsReserved(ip):
16500             self.LogWarning("IP address %s is already reserved", ip)
16501           else:
16502             self.pool.Reserve(ip, external=True)
16503         except errors.AddressPoolError, err:
16504           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16505
16506     if self.op.remove_reserved_ips:
16507       for ip in self.op.remove_reserved_ips:
16508         if ip == self.network.gateway:
16509           self.LogWarning("Cannot unreserve Gateway's IP")
16510           continue
16511         try:
16512           if not self.pool.IsReserved(ip):
16513             self.LogWarning("IP address %s is already unreserved", ip)
16514           else:
16515             self.pool.Release(ip, external=True)
16516         except errors.AddressPoolError, err:
16517           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16518
16519     if self.op.mac_prefix:
16520       self.network.mac_prefix = self.mac_prefix
16521
16522     if self.op.network6:
16523       self.network.network6 = self.network6
16524
16525     if self.op.gateway6:
16526       self.network.gateway6 = self.gateway6
16527
16528     self.pool.Validate()
16529
16530     self.cfg.Update(self.network, feedback_fn)
16531
16532
16533 class _NetworkQuery(_QueryBase):
16534   FIELDS = query.NETWORK_FIELDS
16535
16536   def ExpandNames(self, lu):
16537     lu.needed_locks = {}
16538     lu.share_locks = _ShareAll()
16539
16540     self.do_locking = self.use_locking
16541
16542     all_networks = lu.cfg.GetAllNetworksInfo()
16543     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16544
16545     if self.names:
16546       missing = []
16547       self.wanted = []
16548
16549       for name in self.names:
16550         if name in name_to_uuid:
16551           self.wanted.append(name_to_uuid[name])
16552         else:
16553           missing.append(name)
16554
16555       if missing:
16556         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16557                                    errors.ECODE_NOENT)
16558     else:
16559       self.wanted = locking.ALL_SET
16560
16561     if self.do_locking:
16562       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16563       if query.NETQ_INST in self.requested_data:
16564         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16565       if query.NETQ_GROUP in self.requested_data:
16566         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16567
16568   def DeclareLocks(self, lu, level):
16569     pass
16570
16571   def _GetQueryData(self, lu):
16572     """Computes the list of networks and their attributes.
16573
16574     """
16575     all_networks = lu.cfg.GetAllNetworksInfo()
16576
16577     network_uuids = self._GetNames(lu, all_networks.keys(),
16578                                    locking.LEVEL_NETWORK)
16579
16580     do_instances = query.NETQ_INST in self.requested_data
16581     do_groups = query.NETQ_GROUP in self.requested_data
16582
16583     network_to_instances = None
16584     network_to_groups = None
16585
16586     # For NETQ_GROUP, we need to map network->[groups]
16587     if do_groups:
16588       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16589       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16590       for _, group in all_groups.iteritems():
16591         for net_uuid in network_uuids:
16592           netparams = group.networks.get(net_uuid, None)
16593           if netparams:
16594             info = (group.name, netparams[constants.NIC_MODE],
16595                     netparams[constants.NIC_LINK])
16596
16597             network_to_groups[net_uuid].append(info)
16598
16599     if do_instances:
16600       all_instances = lu.cfg.GetAllInstancesInfo()
16601       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16602       for instance in all_instances.values():
16603         for nic in instance.nics:
16604           if nic.network in network_uuids:
16605             network_to_instances[nic.network].append(instance.name)
16606             break
16607
16608     if query.NETQ_STATS in self.requested_data:
16609       stats = \
16610         dict((uuid,
16611               self._GetStats(network.AddressPool(all_networks[uuid])))
16612              for uuid in network_uuids)
16613     else:
16614       stats = None
16615
16616     return query.NetworkQueryData([all_networks[uuid]
16617                                    for uuid in network_uuids],
16618                                    network_to_groups,
16619                                    network_to_instances,
16620                                    stats)
16621
16622   @staticmethod
16623   def _GetStats(pool):
16624     """Returns statistics for a network address pool.
16625
16626     """
16627     return {
16628       "free_count": pool.GetFreeCount(),
16629       "reserved_count": pool.GetReservedCount(),
16630       "map": pool.GetMap(),
16631       "external_reservations":
16632         utils.CommaJoin(pool.GetExternalReservations()),
16633       }
16634
16635
16636 class LUNetworkQuery(NoHooksLU):
16637   """Logical unit for querying networks.
16638
16639   """
16640   REQ_BGL = False
16641
16642   def CheckArguments(self):
16643     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16644                             self.op.output_fields, self.op.use_locking)
16645
16646   def ExpandNames(self):
16647     self.nq.ExpandNames(self)
16648
16649   def Exec(self, feedback_fn):
16650     return self.nq.OldStyleQuery(self)
16651
16652
16653 class LUNetworkConnect(LogicalUnit):
16654   """Connect a network to a nodegroup
16655
16656   """
16657   HPATH = "network-connect"
16658   HTYPE = constants.HTYPE_NETWORK
16659   REQ_BGL = False
16660
16661   def ExpandNames(self):
16662     self.network_name = self.op.network_name
16663     self.group_name = self.op.group_name
16664     self.network_mode = self.op.network_mode
16665     self.network_link = self.op.network_link
16666
16667     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16668     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16669
16670     self.needed_locks = {
16671       locking.LEVEL_INSTANCE: [],
16672       locking.LEVEL_NODEGROUP: [self.group_uuid],
16673       }
16674     self.share_locks[locking.LEVEL_INSTANCE] = 1
16675
16676     if self.op.conflicts_check:
16677       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16678       self.share_locks[locking.LEVEL_NETWORK] = 1
16679
16680   def DeclareLocks(self, level):
16681     if level == locking.LEVEL_INSTANCE:
16682       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16683
16684       # Lock instances optimistically, needs verification once group lock has
16685       # been acquired
16686       if self.op.conflicts_check:
16687         self.needed_locks[locking.LEVEL_INSTANCE] = \
16688             self.cfg.GetNodeGroupInstances(self.group_uuid)
16689
16690   def BuildHooksEnv(self):
16691     ret = {
16692       "GROUP_NAME": self.group_name,
16693       "GROUP_NETWORK_MODE": self.network_mode,
16694       "GROUP_NETWORK_LINK": self.network_link,
16695       }
16696     return ret
16697
16698   def BuildHooksNodes(self):
16699     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16700     return (nodes, nodes)
16701
16702   def CheckPrereq(self):
16703     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16704
16705     assert self.group_uuid in owned_groups
16706
16707     # Check if locked instances are still correct
16708     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16709     if self.op.conflicts_check:
16710       _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16711
16712     self.netparams = {
16713       constants.NIC_MODE: self.network_mode,
16714       constants.NIC_LINK: self.network_link,
16715       }
16716     objects.NIC.CheckParameterSyntax(self.netparams)
16717
16718     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16719     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16720     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16721     self.connected = False
16722     if self.network_uuid in self.group.networks:
16723       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16724                       (self.network_name, self.group.name))
16725       self.connected = True
16726
16727     # check only if not already connected
16728     elif self.op.conflicts_check:
16729       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16730
16731       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16732                             "connect to", owned_instances)
16733
16734   def Exec(self, feedback_fn):
16735     # Connect the network and update the group only if not already connected
16736     if not self.connected:
16737       self.group.networks[self.network_uuid] = self.netparams
16738       self.cfg.Update(self.group, feedback_fn)
16739
16740
16741 def _NetworkConflictCheck(lu, check_fn, action, instances):
16742   """Checks for network interface conflicts with a network.
16743
16744   @type lu: L{LogicalUnit}
16745   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16746     returning boolean
16747   @param check_fn: Function checking for conflict
16748   @type action: string
16749   @param action: Part of error message (see code)
16750   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16751
16752   """
16753   conflicts = []
16754
16755   for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16756     instconflicts = [(idx, nic.ip)
16757                      for (idx, nic) in enumerate(instance.nics)
16758                      if check_fn(nic)]
16759
16760     if instconflicts:
16761       conflicts.append((instance.name, instconflicts))
16762
16763   if conflicts:
16764     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16765                   " node group '%s', are in use: %s" %
16766                   (lu.network_name, action, lu.group.name,
16767                    utils.CommaJoin(("%s: %s" %
16768                                     (name, _FmtNetworkConflict(details)))
16769                                    for (name, details) in conflicts)))
16770
16771     raise errors.OpPrereqError("Conflicting IP addresses found; "
16772                                " remove/modify the corresponding network"
16773                                " interfaces", errors.ECODE_STATE)
16774
16775
16776 def _FmtNetworkConflict(details):
16777   """Utility for L{_NetworkConflictCheck}.
16778
16779   """
16780   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16781                          for (idx, ipaddr) in details)
16782
16783
16784 class LUNetworkDisconnect(LogicalUnit):
16785   """Disconnect a network to a nodegroup
16786
16787   """
16788   HPATH = "network-disconnect"
16789   HTYPE = constants.HTYPE_NETWORK
16790   REQ_BGL = False
16791
16792   def ExpandNames(self):
16793     self.network_name = self.op.network_name
16794     self.group_name = self.op.group_name
16795
16796     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16797     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16798
16799     self.needed_locks = {
16800       locking.LEVEL_INSTANCE: [],
16801       locking.LEVEL_NODEGROUP: [self.group_uuid],
16802       }
16803     self.share_locks[locking.LEVEL_INSTANCE] = 1
16804
16805   def DeclareLocks(self, level):
16806     if level == locking.LEVEL_INSTANCE:
16807       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16808
16809       # Lock instances optimistically, needs verification once group lock has
16810       # been acquired
16811       self.needed_locks[locking.LEVEL_INSTANCE] = \
16812         self.cfg.GetNodeGroupInstances(self.group_uuid)
16813
16814   def BuildHooksEnv(self):
16815     ret = {
16816       "GROUP_NAME": self.group_name,
16817       }
16818     return ret
16819
16820   def BuildHooksNodes(self):
16821     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16822     return (nodes, nodes)
16823
16824   def CheckPrereq(self):
16825     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16826
16827     assert self.group_uuid in owned_groups
16828
16829     # Check if locked instances are still correct
16830     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16831     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16832
16833     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16834     self.connected = True
16835     if self.network_uuid not in self.group.networks:
16836       self.LogWarning("Network '%s' is not mapped to group '%s'",
16837                       self.network_name, self.group.name)
16838       self.connected = False
16839
16840     # We need this check only if network is not already connected
16841     else:
16842       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16843                             "disconnect from", owned_instances)
16844
16845   def Exec(self, feedback_fn):
16846     # Disconnect the network and update the group only if network is connected
16847     if self.connected:
16848       del self.group.networks[self.network_uuid]
16849       self.cfg.Update(self.group, feedback_fn)
16850
16851
16852 #: Query type implementations
16853 _QUERY_IMPL = {
16854   constants.QR_CLUSTER: _ClusterQuery,
16855   constants.QR_INSTANCE: _InstanceQuery,
16856   constants.QR_NODE: _NodeQuery,
16857   constants.QR_GROUP: _GroupQuery,
16858   constants.QR_NETWORK: _NetworkQuery,
16859   constants.QR_OS: _OsQuery,
16860   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16861   constants.QR_EXPORT: _ExportQuery,
16862   }
16863
16864 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16865
16866
16867 def _GetQueryImplementation(name):
16868   """Returns the implemtnation for a query type.
16869
16870   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16871
16872   """
16873   try:
16874     return _QUERY_IMPL[name]
16875   except KeyError:
16876     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16877                                errors.ECODE_INVAL)
16878
16879
16880 def _CheckForConflictingIp(lu, ip, node):
16881   """In case of conflicting IP address raise error.
16882
16883   @type ip: string
16884   @param ip: IP address
16885   @type node: string
16886   @param node: node name
16887
16888   """
16889   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16890   if conf_net is not None:
16891     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16892                                 (ip, conf_net)),
16893                                errors.ECODE_STATE)
16894
16895   return (None, None)