code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckGlobalHvParams(params):
1024   """Validates that given hypervisor params are not global ones.
1025
1026   This will ensure that instances don't get customised versions of
1027   global params.
1028
1029   """
1030   used_globals = constants.HVC_GLOBALS.intersection(params)
1031   if used_globals:
1032     msg = ("The following hypervisor parameters are global and cannot"
1033            " be customized at instance level, please modify them at"
1034            " cluster level: %s" % utils.CommaJoin(used_globals))
1035     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1036
1037
1038 def _CheckNodeOnline(lu, node, msg=None):
1039   """Ensure that a given node is online.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @param msg: if passed, should be a message to replace the default one
1044   @raise errors.OpPrereqError: if the node is offline
1045
1046   """
1047   if msg is None:
1048     msg = "Can't use offline node"
1049   if lu.cfg.GetNodeInfo(node).offline:
1050     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1051
1052
1053 def _CheckNodeNotDrained(lu, node):
1054   """Ensure that a given node is not drained.
1055
1056   @param lu: the LU on behalf of which we make the check
1057   @param node: the node to check
1058   @raise errors.OpPrereqError: if the node is drained
1059
1060   """
1061   if lu.cfg.GetNodeInfo(node).drained:
1062     raise errors.OpPrereqError("Can't use drained node %s" % node,
1063                                errors.ECODE_STATE)
1064
1065
1066 def _CheckNodeVmCapable(lu, node):
1067   """Ensure that a given node is vm capable.
1068
1069   @param lu: the LU on behalf of which we make the check
1070   @param node: the node to check
1071   @raise errors.OpPrereqError: if the node is not vm capable
1072
1073   """
1074   if not lu.cfg.GetNodeInfo(node).vm_capable:
1075     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1076                                errors.ECODE_STATE)
1077
1078
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080   """Ensure that a node supports a given OS.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param node: the node to check
1084   @param os_name: the OS to query about
1085   @param force_variant: whether to ignore variant errors
1086   @raise errors.OpPrereqError: if the node is not supporting the OS
1087
1088   """
1089   result = lu.rpc.call_os_get(node, os_name)
1090   result.Raise("OS '%s' not in supported OS list for node %s" %
1091                (os_name, node),
1092                prereq=True, ecode=errors.ECODE_INVAL)
1093   if not force_variant:
1094     _CheckOSVariant(result.payload, os_name)
1095
1096
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098   """Ensure that a node has the given secondary ip.
1099
1100   @type lu: L{LogicalUnit}
1101   @param lu: the LU on behalf of which we make the check
1102   @type node: string
1103   @param node: the node to check
1104   @type secondary_ip: string
1105   @param secondary_ip: the ip to check
1106   @type prereq: boolean
1107   @param prereq: whether to throw a prerequisite or an execute error
1108   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1110
1111   """
1112   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113   result.Raise("Failure checking secondary ip on node %s" % node,
1114                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115   if not result.payload:
1116     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117            " please fix and re-run this command" % secondary_ip)
1118     if prereq:
1119       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1120     else:
1121       raise errors.OpExecError(msg)
1122
1123
1124 def _CheckNodePVs(nresult, exclusive_storage):
1125   """Check node PVs.
1126
1127   """
1128   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129   if pvlist_dict is None:
1130     return (["Can't get PV list from node"], None)
1131   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1132   errlist = []
1133   # check that ':' is not present in PV names, since it's a
1134   # special character for lvcreate (denotes the range of PEs to
1135   # use on the PV)
1136   for pv in pvlist:
1137     if ":" in pv.name:
1138       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139                      (pv.name, pv.vg_name))
1140   es_pvinfo = None
1141   if exclusive_storage:
1142     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143     errlist.extend(errmsgs)
1144     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1145     if shared_pvs:
1146       for (pvname, lvlist) in shared_pvs:
1147         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149                        (pvname, utils.CommaJoin(lvlist)))
1150   return (errlist, es_pvinfo)
1151
1152
1153 def _GetClusterDomainSecret():
1154   """Reads the cluster domain secret.
1155
1156   """
1157   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1158                                strict=True)
1159
1160
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162   """Ensure that an instance is in one of the required states.
1163
1164   @param lu: the LU on behalf of which we make the check
1165   @param instance: the instance to check
1166   @param msg: if passed, should be a message to replace the default one
1167   @raise errors.OpPrereqError: if the instance is not in the required state
1168
1169   """
1170   if msg is None:
1171     msg = ("can't use instance from outside %s states" %
1172            utils.CommaJoin(req_states))
1173   if instance.admin_state not in req_states:
1174     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175                                (instance.name, instance.admin_state, msg),
1176                                errors.ECODE_STATE)
1177
1178   if constants.ADMINST_UP not in req_states:
1179     pnode = instance.primary_node
1180     if not lu.cfg.GetNodeInfo(pnode).offline:
1181       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183                   prereq=True, ecode=errors.ECODE_ENVIRON)
1184       if instance.name in ins_l.payload:
1185         raise errors.OpPrereqError("Instance %s is running, %s" %
1186                                    (instance.name, msg), errors.ECODE_STATE)
1187     else:
1188       lu.LogWarning("Primary node offline, ignoring check that instance"
1189                      " is down")
1190
1191
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193   """Computes if value is in the desired range.
1194
1195   @param name: name of the parameter for which we perform the check
1196   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1197       not just 'disk')
1198   @param ipolicy: dictionary containing min, max and std values
1199   @param value: actual value that we want to use
1200   @return: None or element not meeting the criteria
1201
1202
1203   """
1204   if value in [None, constants.VALUE_AUTO]:
1205     return None
1206   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208   if value > max_v or min_v > value:
1209     if qualifier:
1210       fqn = "%s/%s" % (name, qualifier)
1211     else:
1212       fqn = name
1213     return ("%s value %s is not in range [%s, %s]" %
1214             (fqn, value, min_v, max_v))
1215   return None
1216
1217
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219                                  nic_count, disk_sizes, spindle_use,
1220                                  _compute_fn=_ComputeMinMaxSpec):
1221   """Verifies ipolicy against provided specs.
1222
1223   @type ipolicy: dict
1224   @param ipolicy: The ipolicy
1225   @type mem_size: int
1226   @param mem_size: The memory size
1227   @type cpu_count: int
1228   @param cpu_count: Used cpu cores
1229   @type disk_count: int
1230   @param disk_count: Number of disks used
1231   @type nic_count: int
1232   @param nic_count: Number of nics used
1233   @type disk_sizes: list of ints
1234   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235   @type spindle_use: int
1236   @param spindle_use: The number of spindles this instance uses
1237   @param _compute_fn: The compute function (unittest only)
1238   @return: A list of violations, or an empty list of no violations are found
1239
1240   """
1241   assert disk_count == len(disk_sizes)
1242
1243   test_settings = [
1244     (constants.ISPEC_MEM_SIZE, "", mem_size),
1245     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246     (constants.ISPEC_DISK_COUNT, "", disk_count),
1247     (constants.ISPEC_NIC_COUNT, "", nic_count),
1248     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250          for idx, d in enumerate(disk_sizes)]
1251
1252   return filter(None,
1253                 (_compute_fn(name, qualifier, ipolicy, value)
1254                  for (name, qualifier, value) in test_settings))
1255
1256
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258                                      _compute_fn=_ComputeIPolicySpecViolation):
1259   """Compute if instance meets the specs of ipolicy.
1260
1261   @type ipolicy: dict
1262   @param ipolicy: The ipolicy to verify against
1263   @type instance: L{objects.Instance}
1264   @param instance: The instance to verify
1265   @param _compute_fn: The function to verify ipolicy (unittest only)
1266   @see: L{_ComputeIPolicySpecViolation}
1267
1268   """
1269   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272   disk_count = len(instance.disks)
1273   disk_sizes = [disk.size for disk in instance.disks]
1274   nic_count = len(instance.nics)
1275
1276   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277                      disk_sizes, spindle_use)
1278
1279
1280 def _ComputeIPolicyInstanceSpecViolation(
1281   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282   """Compute if instance specs meets the specs of ipolicy.
1283
1284   @type ipolicy: dict
1285   @param ipolicy: The ipolicy to verify against
1286   @param instance_spec: dict
1287   @param instance_spec: The instance spec to verify
1288   @param _compute_fn: The function to verify ipolicy (unittest only)
1289   @see: L{_ComputeIPolicySpecViolation}
1290
1291   """
1292   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1298
1299   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300                      disk_sizes, spindle_use)
1301
1302
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1304                                  target_group,
1305                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1306   """Compute if instance meets the specs of the new target group.
1307
1308   @param ipolicy: The ipolicy to verify
1309   @param instance: The instance object to verify
1310   @param current_group: The current group of the instance
1311   @param target_group: The new group of the instance
1312   @param _compute_fn: The function to verify ipolicy (unittest only)
1313   @see: L{_ComputeIPolicySpecViolation}
1314
1315   """
1316   if current_group == target_group:
1317     return []
1318   else:
1319     return _compute_fn(ipolicy, instance)
1320
1321
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323                             _compute_fn=_ComputeIPolicyNodeViolation):
1324   """Checks that the target node is correct in terms of instance policy.
1325
1326   @param ipolicy: The ipolicy to verify
1327   @param instance: The instance object to verify
1328   @param node: The new node to relocate
1329   @param ignore: Ignore violations of the ipolicy
1330   @param _compute_fn: The function to verify ipolicy (unittest only)
1331   @see: L{_ComputeIPolicySpecViolation}
1332
1333   """
1334   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1336
1337   if res:
1338     msg = ("Instance does not meet target node group's (%s) instance"
1339            " policy: %s") % (node.group, utils.CommaJoin(res))
1340     if ignore:
1341       lu.LogWarning(msg)
1342     else:
1343       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1344
1345
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347   """Computes a set of any instances that would violate the new ipolicy.
1348
1349   @param old_ipolicy: The current (still in-place) ipolicy
1350   @param new_ipolicy: The new (to become) ipolicy
1351   @param instances: List of instances to verify
1352   @return: A list of instances which violates the new ipolicy but
1353       did not before
1354
1355   """
1356   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357           _ComputeViolatingInstances(old_ipolicy, instances))
1358
1359
1360 def _ExpandItemName(fn, name, kind):
1361   """Expand an item name.
1362
1363   @param fn: the function to use for expansion
1364   @param name: requested item name
1365   @param kind: text description ('Node' or 'Instance')
1366   @return: the resolved (full) name
1367   @raise errors.OpPrereqError: if the item is not found
1368
1369   """
1370   full_name = fn(name)
1371   if full_name is None:
1372     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1373                                errors.ECODE_NOENT)
1374   return full_name
1375
1376
1377 def _ExpandNodeName(cfg, name):
1378   """Wrapper over L{_ExpandItemName} for nodes."""
1379   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1380
1381
1382 def _ExpandInstanceName(cfg, name):
1383   """Wrapper over L{_ExpandItemName} for instance."""
1384   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1385
1386
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388                          network_type, mac_prefix, tags):
1389   """Builds network related env variables for hooks
1390
1391   This builds the hook environment from individual variables.
1392
1393   @type name: string
1394   @param name: the name of the network
1395   @type subnet: string
1396   @param subnet: the ipv4 subnet
1397   @type gateway: string
1398   @param gateway: the ipv4 gateway
1399   @type network6: string
1400   @param network6: the ipv6 subnet
1401   @type gateway6: string
1402   @param gateway6: the ipv6 gateway
1403   @type network_type: string
1404   @param network_type: the type of the network
1405   @type mac_prefix: string
1406   @param mac_prefix: the mac_prefix
1407   @type tags: list
1408   @param tags: the tags of the network
1409
1410   """
1411   env = {}
1412   if name:
1413     env["NETWORK_NAME"] = name
1414   if subnet:
1415     env["NETWORK_SUBNET"] = subnet
1416   if gateway:
1417     env["NETWORK_GATEWAY"] = gateway
1418   if network6:
1419     env["NETWORK_SUBNET6"] = network6
1420   if gateway6:
1421     env["NETWORK_GATEWAY6"] = gateway6
1422   if mac_prefix:
1423     env["NETWORK_MAC_PREFIX"] = mac_prefix
1424   if network_type:
1425     env["NETWORK_TYPE"] = network_type
1426   if tags:
1427     env["NETWORK_TAGS"] = " ".join(tags)
1428
1429   return env
1430
1431
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433                           minmem, maxmem, vcpus, nics, disk_template, disks,
1434                           bep, hvp, hypervisor_name, tags):
1435   """Builds instance related env variables for hooks
1436
1437   This builds the hook environment from individual variables.
1438
1439   @type name: string
1440   @param name: the name of the instance
1441   @type primary_node: string
1442   @param primary_node: the name of the instance's primary node
1443   @type secondary_nodes: list
1444   @param secondary_nodes: list of secondary nodes as strings
1445   @type os_type: string
1446   @param os_type: the name of the instance's OS
1447   @type status: string
1448   @param status: the desired status of the instance
1449   @type minmem: string
1450   @param minmem: the minimum memory size of the instance
1451   @type maxmem: string
1452   @param maxmem: the maximum memory size of the instance
1453   @type vcpus: string
1454   @param vcpus: the count of VCPUs the instance has
1455   @type nics: list
1456   @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1457       the NICs the instance has
1458   @type disk_template: string
1459   @param disk_template: the disk template of the instance
1460   @type disks: list
1461   @param disks: the list of (size, mode) pairs
1462   @type bep: dict
1463   @param bep: the backend parameters for the instance
1464   @type hvp: dict
1465   @param hvp: the hypervisor parameters for the instance
1466   @type hypervisor_name: string
1467   @param hypervisor_name: the hypervisor for the instance
1468   @type tags: list
1469   @param tags: list of instance tags as strings
1470   @rtype: dict
1471   @return: the hook environment for this instance
1472
1473   """
1474   env = {
1475     "OP_TARGET": name,
1476     "INSTANCE_NAME": name,
1477     "INSTANCE_PRIMARY": primary_node,
1478     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479     "INSTANCE_OS_TYPE": os_type,
1480     "INSTANCE_STATUS": status,
1481     "INSTANCE_MINMEM": minmem,
1482     "INSTANCE_MAXMEM": maxmem,
1483     # TODO(2.7) remove deprecated "memory" value
1484     "INSTANCE_MEMORY": maxmem,
1485     "INSTANCE_VCPUS": vcpus,
1486     "INSTANCE_DISK_TEMPLATE": disk_template,
1487     "INSTANCE_HYPERVISOR": hypervisor_name,
1488   }
1489   if nics:
1490     nic_count = len(nics)
1491     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1492       if ip is None:
1493         ip = ""
1494       env["INSTANCE_NIC%d_IP" % idx] = ip
1495       env["INSTANCE_NIC%d_MAC" % idx] = mac
1496       env["INSTANCE_NIC%d_MODE" % idx] = mode
1497       env["INSTANCE_NIC%d_LINK" % idx] = link
1498       if netinfo:
1499         nobj = objects.Network.FromDict(netinfo)
1500         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1501       elif network:
1502         # FIXME: broken network reference: the instance NIC specifies a
1503         # network, but the relevant network entry was not in the config. This
1504         # should be made impossible.
1505         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1506       if mode == constants.NIC_MODE_BRIDGED:
1507         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1508   else:
1509     nic_count = 0
1510
1511   env["INSTANCE_NIC_COUNT"] = nic_count
1512
1513   if disks:
1514     disk_count = len(disks)
1515     for idx, (size, mode) in enumerate(disks):
1516       env["INSTANCE_DISK%d_SIZE" % idx] = size
1517       env["INSTANCE_DISK%d_MODE" % idx] = mode
1518   else:
1519     disk_count = 0
1520
1521   env["INSTANCE_DISK_COUNT"] = disk_count
1522
1523   if not tags:
1524     tags = []
1525
1526   env["INSTANCE_TAGS"] = " ".join(tags)
1527
1528   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1529     for key, value in source.items():
1530       env["INSTANCE_%s_%s" % (kind, key)] = value
1531
1532   return env
1533
1534
1535 def _NICToTuple(lu, nic):
1536   """Build a tupple of nic information.
1537
1538   @type lu:  L{LogicalUnit}
1539   @param lu: the logical unit on whose behalf we execute
1540   @type nic: L{objects.NIC}
1541   @param nic: nic to convert to hooks tuple
1542
1543   """
1544   ip = nic.ip
1545   mac = nic.mac
1546   cluster = lu.cfg.GetClusterInfo()
1547   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1548   mode = filled_params[constants.NIC_MODE]
1549   link = filled_params[constants.NIC_LINK]
1550   net = nic.network
1551   netinfo = None
1552   if net:
1553     net_uuid = lu.cfg.LookupNetwork(net)
1554     if net_uuid:
1555       nobj = lu.cfg.GetNetwork(net_uuid)
1556       netinfo = objects.Network.ToDict(nobj)
1557   return (ip, mac, mode, link, net, netinfo)
1558
1559
1560 def _NICListToTuple(lu, nics):
1561   """Build a list of nic information tuples.
1562
1563   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1564   value in LUInstanceQueryData.
1565
1566   @type lu:  L{LogicalUnit}
1567   @param lu: the logical unit on whose behalf we execute
1568   @type nics: list of L{objects.NIC}
1569   @param nics: list of nics to convert to hooks tuples
1570
1571   """
1572   hooks_nics = []
1573   for nic in nics:
1574     hooks_nics.append(_NICToTuple(lu, nic))
1575   return hooks_nics
1576
1577
1578 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1579   """Builds instance related env variables for hooks from an object.
1580
1581   @type lu: L{LogicalUnit}
1582   @param lu: the logical unit on whose behalf we execute
1583   @type instance: L{objects.Instance}
1584   @param instance: the instance for which we should build the
1585       environment
1586   @type override: dict
1587   @param override: dictionary with key/values that will override
1588       our values
1589   @rtype: dict
1590   @return: the hook environment dictionary
1591
1592   """
1593   cluster = lu.cfg.GetClusterInfo()
1594   bep = cluster.FillBE(instance)
1595   hvp = cluster.FillHV(instance)
1596   args = {
1597     "name": instance.name,
1598     "primary_node": instance.primary_node,
1599     "secondary_nodes": instance.secondary_nodes,
1600     "os_type": instance.os,
1601     "status": instance.admin_state,
1602     "maxmem": bep[constants.BE_MAXMEM],
1603     "minmem": bep[constants.BE_MINMEM],
1604     "vcpus": bep[constants.BE_VCPUS],
1605     "nics": _NICListToTuple(lu, instance.nics),
1606     "disk_template": instance.disk_template,
1607     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1608     "bep": bep,
1609     "hvp": hvp,
1610     "hypervisor_name": instance.hypervisor,
1611     "tags": instance.tags,
1612   }
1613   if override:
1614     args.update(override)
1615   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1616
1617
1618 def _AdjustCandidatePool(lu, exceptions):
1619   """Adjust the candidate pool after node operations.
1620
1621   """
1622   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1623   if mod_list:
1624     lu.LogInfo("Promoted nodes to master candidate role: %s",
1625                utils.CommaJoin(node.name for node in mod_list))
1626     for name in mod_list:
1627       lu.context.ReaddNode(name)
1628   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1629   if mc_now > mc_max:
1630     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1631                (mc_now, mc_max))
1632
1633
1634 def _DecideSelfPromotion(lu, exceptions=None):
1635   """Decide whether I should promote myself as a master candidate.
1636
1637   """
1638   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1639   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1640   # the new node will increase mc_max with one, so:
1641   mc_should = min(mc_should + 1, cp_size)
1642   return mc_now < mc_should
1643
1644
1645 def _ComputeViolatingInstances(ipolicy, instances):
1646   """Computes a set of instances who violates given ipolicy.
1647
1648   @param ipolicy: The ipolicy to verify
1649   @type instances: object.Instance
1650   @param instances: List of instances to verify
1651   @return: A frozenset of instance names violating the ipolicy
1652
1653   """
1654   return frozenset([inst.name for inst in instances
1655                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1656
1657
1658 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1659   """Check that the brigdes needed by a list of nics exist.
1660
1661   """
1662   cluster = lu.cfg.GetClusterInfo()
1663   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1664   brlist = [params[constants.NIC_LINK] for params in paramslist
1665             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1666   if brlist:
1667     result = lu.rpc.call_bridges_exist(target_node, brlist)
1668     result.Raise("Error checking bridges on destination node '%s'" %
1669                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1670
1671
1672 def _CheckInstanceBridgesExist(lu, instance, node=None):
1673   """Check that the brigdes needed by an instance exist.
1674
1675   """
1676   if node is None:
1677     node = instance.primary_node
1678   _CheckNicsBridgesExist(lu, instance.nics, node)
1679
1680
1681 def _CheckOSVariant(os_obj, name):
1682   """Check whether an OS name conforms to the os variants specification.
1683
1684   @type os_obj: L{objects.OS}
1685   @param os_obj: OS object to check
1686   @type name: string
1687   @param name: OS name passed by the user, to check for validity
1688
1689   """
1690   variant = objects.OS.GetVariant(name)
1691   if not os_obj.supported_variants:
1692     if variant:
1693       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1694                                  " passed)" % (os_obj.name, variant),
1695                                  errors.ECODE_INVAL)
1696     return
1697   if not variant:
1698     raise errors.OpPrereqError("OS name must include a variant",
1699                                errors.ECODE_INVAL)
1700
1701   if variant not in os_obj.supported_variants:
1702     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1703
1704
1705 def _GetNodeInstancesInner(cfg, fn):
1706   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1707
1708
1709 def _GetNodeInstances(cfg, node_name):
1710   """Returns a list of all primary and secondary instances on a node.
1711
1712   """
1713
1714   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1715
1716
1717 def _GetNodePrimaryInstances(cfg, node_name):
1718   """Returns primary instances on a node.
1719
1720   """
1721   return _GetNodeInstancesInner(cfg,
1722                                 lambda inst: node_name == inst.primary_node)
1723
1724
1725 def _GetNodeSecondaryInstances(cfg, node_name):
1726   """Returns secondary instances on a node.
1727
1728   """
1729   return _GetNodeInstancesInner(cfg,
1730                                 lambda inst: node_name in inst.secondary_nodes)
1731
1732
1733 def _GetStorageTypeArgs(cfg, storage_type):
1734   """Returns the arguments for a storage type.
1735
1736   """
1737   # Special case for file storage
1738   if storage_type == constants.ST_FILE:
1739     # storage.FileStorage wants a list of storage directories
1740     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1741
1742   return []
1743
1744
1745 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1746   faulty = []
1747
1748   for dev in instance.disks:
1749     cfg.SetDiskID(dev, node_name)
1750
1751   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1752                                                                 instance))
1753   result.Raise("Failed to get disk status from node %s" % node_name,
1754                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1755
1756   for idx, bdev_status in enumerate(result.payload):
1757     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1758       faulty.append(idx)
1759
1760   return faulty
1761
1762
1763 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1764   """Check the sanity of iallocator and node arguments and use the
1765   cluster-wide iallocator if appropriate.
1766
1767   Check that at most one of (iallocator, node) is specified. If none is
1768   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1769   then the LU's opcode's iallocator slot is filled with the cluster-wide
1770   default iallocator.
1771
1772   @type iallocator_slot: string
1773   @param iallocator_slot: the name of the opcode iallocator slot
1774   @type node_slot: string
1775   @param node_slot: the name of the opcode target node slot
1776
1777   """
1778   node = getattr(lu.op, node_slot, None)
1779   ialloc = getattr(lu.op, iallocator_slot, None)
1780   if node == []:
1781     node = None
1782
1783   if node is not None and ialloc is not None:
1784     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1785                                errors.ECODE_INVAL)
1786   elif ((node is None and ialloc is None) or
1787         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1788     default_iallocator = lu.cfg.GetDefaultIAllocator()
1789     if default_iallocator:
1790       setattr(lu.op, iallocator_slot, default_iallocator)
1791     else:
1792       raise errors.OpPrereqError("No iallocator or node given and no"
1793                                  " cluster-wide default iallocator found;"
1794                                  " please specify either an iallocator or a"
1795                                  " node, or set a cluster-wide default"
1796                                  " iallocator", errors.ECODE_INVAL)
1797
1798
1799 def _GetDefaultIAllocator(cfg, ialloc):
1800   """Decides on which iallocator to use.
1801
1802   @type cfg: L{config.ConfigWriter}
1803   @param cfg: Cluster configuration object
1804   @type ialloc: string or None
1805   @param ialloc: Iallocator specified in opcode
1806   @rtype: string
1807   @return: Iallocator name
1808
1809   """
1810   if not ialloc:
1811     # Use default iallocator
1812     ialloc = cfg.GetDefaultIAllocator()
1813
1814   if not ialloc:
1815     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1816                                " opcode nor as a cluster-wide default",
1817                                errors.ECODE_INVAL)
1818
1819   return ialloc
1820
1821
1822 def _CheckHostnameSane(lu, name):
1823   """Ensures that a given hostname resolves to a 'sane' name.
1824
1825   The given name is required to be a prefix of the resolved hostname,
1826   to prevent accidental mismatches.
1827
1828   @param lu: the logical unit on behalf of which we're checking
1829   @param name: the name we should resolve and check
1830   @return: the resolved hostname object
1831
1832   """
1833   hostname = netutils.GetHostname(name=name)
1834   if hostname.name != name:
1835     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1836   if not utils.MatchNameComponent(name, [hostname.name]):
1837     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1838                                 " same as given hostname '%s'") %
1839                                 (hostname.name, name), errors.ECODE_INVAL)
1840   return hostname
1841
1842
1843 class LUClusterPostInit(LogicalUnit):
1844   """Logical unit for running hooks after cluster initialization.
1845
1846   """
1847   HPATH = "cluster-init"
1848   HTYPE = constants.HTYPE_CLUSTER
1849
1850   def BuildHooksEnv(self):
1851     """Build hooks env.
1852
1853     """
1854     return {
1855       "OP_TARGET": self.cfg.GetClusterName(),
1856       }
1857
1858   def BuildHooksNodes(self):
1859     """Build hooks nodes.
1860
1861     """
1862     return ([], [self.cfg.GetMasterNode()])
1863
1864   def Exec(self, feedback_fn):
1865     """Nothing to do.
1866
1867     """
1868     return True
1869
1870
1871 class LUClusterDestroy(LogicalUnit):
1872   """Logical unit for destroying the cluster.
1873
1874   """
1875   HPATH = "cluster-destroy"
1876   HTYPE = constants.HTYPE_CLUSTER
1877
1878   def BuildHooksEnv(self):
1879     """Build hooks env.
1880
1881     """
1882     return {
1883       "OP_TARGET": self.cfg.GetClusterName(),
1884       }
1885
1886   def BuildHooksNodes(self):
1887     """Build hooks nodes.
1888
1889     """
1890     return ([], [])
1891
1892   def CheckPrereq(self):
1893     """Check prerequisites.
1894
1895     This checks whether the cluster is empty.
1896
1897     Any errors are signaled by raising errors.OpPrereqError.
1898
1899     """
1900     master = self.cfg.GetMasterNode()
1901
1902     nodelist = self.cfg.GetNodeList()
1903     if len(nodelist) != 1 or nodelist[0] != master:
1904       raise errors.OpPrereqError("There are still %d node(s) in"
1905                                  " this cluster." % (len(nodelist) - 1),
1906                                  errors.ECODE_INVAL)
1907     instancelist = self.cfg.GetInstanceList()
1908     if instancelist:
1909       raise errors.OpPrereqError("There are still %d instance(s) in"
1910                                  " this cluster." % len(instancelist),
1911                                  errors.ECODE_INVAL)
1912
1913   def Exec(self, feedback_fn):
1914     """Destroys the cluster.
1915
1916     """
1917     master_params = self.cfg.GetMasterNetworkParameters()
1918
1919     # Run post hooks on master node before it's removed
1920     _RunPostHook(self, master_params.name)
1921
1922     ems = self.cfg.GetUseExternalMipScript()
1923     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1924                                                      master_params, ems)
1925     if result.fail_msg:
1926       self.LogWarning("Error disabling the master IP address: %s",
1927                       result.fail_msg)
1928
1929     return master_params.name
1930
1931
1932 def _VerifyCertificate(filename):
1933   """Verifies a certificate for L{LUClusterVerifyConfig}.
1934
1935   @type filename: string
1936   @param filename: Path to PEM file
1937
1938   """
1939   try:
1940     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1941                                            utils.ReadFile(filename))
1942   except Exception, err: # pylint: disable=W0703
1943     return (LUClusterVerifyConfig.ETYPE_ERROR,
1944             "Failed to load X509 certificate %s: %s" % (filename, err))
1945
1946   (errcode, msg) = \
1947     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1948                                 constants.SSL_CERT_EXPIRATION_ERROR)
1949
1950   if msg:
1951     fnamemsg = "While verifying %s: %s" % (filename, msg)
1952   else:
1953     fnamemsg = None
1954
1955   if errcode is None:
1956     return (None, fnamemsg)
1957   elif errcode == utils.CERT_WARNING:
1958     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1959   elif errcode == utils.CERT_ERROR:
1960     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1961
1962   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1963
1964
1965 def _GetAllHypervisorParameters(cluster, instances):
1966   """Compute the set of all hypervisor parameters.
1967
1968   @type cluster: L{objects.Cluster}
1969   @param cluster: the cluster object
1970   @param instances: list of L{objects.Instance}
1971   @param instances: additional instances from which to obtain parameters
1972   @rtype: list of (origin, hypervisor, parameters)
1973   @return: a list with all parameters found, indicating the hypervisor they
1974        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1975
1976   """
1977   hvp_data = []
1978
1979   for hv_name in cluster.enabled_hypervisors:
1980     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1981
1982   for os_name, os_hvp in cluster.os_hvp.items():
1983     for hv_name, hv_params in os_hvp.items():
1984       if hv_params:
1985         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1986         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1987
1988   # TODO: collapse identical parameter values in a single one
1989   for instance in instances:
1990     if instance.hvparams:
1991       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1992                        cluster.FillHV(instance)))
1993
1994   return hvp_data
1995
1996
1997 class _VerifyErrors(object):
1998   """Mix-in for cluster/group verify LUs.
1999
2000   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2001   self.op and self._feedback_fn to be available.)
2002
2003   """
2004
2005   ETYPE_FIELD = "code"
2006   ETYPE_ERROR = "ERROR"
2007   ETYPE_WARNING = "WARNING"
2008
2009   def _Error(self, ecode, item, msg, *args, **kwargs):
2010     """Format an error message.
2011
2012     Based on the opcode's error_codes parameter, either format a
2013     parseable error code, or a simpler error string.
2014
2015     This must be called only from Exec and functions called from Exec.
2016
2017     """
2018     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2019     itype, etxt, _ = ecode
2020     # If the error code is in the list of ignored errors, demote the error to a
2021     # warning
2022     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2023       ltype = self.ETYPE_WARNING
2024     # first complete the msg
2025     if args:
2026       msg = msg % args
2027     # then format the whole message
2028     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2029       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2030     else:
2031       if item:
2032         item = " " + item
2033       else:
2034         item = ""
2035       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2036     # and finally report it via the feedback_fn
2037     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2038     # do not mark the operation as failed for WARN cases only
2039     if ltype == self.ETYPE_ERROR:
2040       self.bad = True
2041
2042   def _ErrorIf(self, cond, *args, **kwargs):
2043     """Log an error message if the passed condition is True.
2044
2045     """
2046     if (bool(cond)
2047         or self.op.debug_simulate_errors): # pylint: disable=E1101
2048       self._Error(*args, **kwargs)
2049
2050
2051 class LUClusterVerify(NoHooksLU):
2052   """Submits all jobs necessary to verify the cluster.
2053
2054   """
2055   REQ_BGL = False
2056
2057   def ExpandNames(self):
2058     self.needed_locks = {}
2059
2060   def Exec(self, feedback_fn):
2061     jobs = []
2062
2063     if self.op.group_name:
2064       groups = [self.op.group_name]
2065       depends_fn = lambda: None
2066     else:
2067       groups = self.cfg.GetNodeGroupList()
2068
2069       # Verify global configuration
2070       jobs.append([
2071         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2072         ])
2073
2074       # Always depend on global verification
2075       depends_fn = lambda: [(-len(jobs), [])]
2076
2077     jobs.extend(
2078       [opcodes.OpClusterVerifyGroup(group_name=group,
2079                                     ignore_errors=self.op.ignore_errors,
2080                                     depends=depends_fn())]
2081       for group in groups)
2082
2083     # Fix up all parameters
2084     for op in itertools.chain(*jobs): # pylint: disable=W0142
2085       op.debug_simulate_errors = self.op.debug_simulate_errors
2086       op.verbose = self.op.verbose
2087       op.error_codes = self.op.error_codes
2088       try:
2089         op.skip_checks = self.op.skip_checks
2090       except AttributeError:
2091         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2092
2093     return ResultWithJobs(jobs)
2094
2095
2096 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2097   """Verifies the cluster config.
2098
2099   """
2100   REQ_BGL = False
2101
2102   def _VerifyHVP(self, hvp_data):
2103     """Verifies locally the syntax of the hypervisor parameters.
2104
2105     """
2106     for item, hv_name, hv_params in hvp_data:
2107       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2108              (item, hv_name))
2109       try:
2110         hv_class = hypervisor.GetHypervisorClass(hv_name)
2111         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2112         hv_class.CheckParameterSyntax(hv_params)
2113       except errors.GenericError, err:
2114         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2115
2116   def ExpandNames(self):
2117     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2118     self.share_locks = _ShareAll()
2119
2120   def CheckPrereq(self):
2121     """Check prerequisites.
2122
2123     """
2124     # Retrieve all information
2125     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2126     self.all_node_info = self.cfg.GetAllNodesInfo()
2127     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2128
2129   def Exec(self, feedback_fn):
2130     """Verify integrity of cluster, performing various test on nodes.
2131
2132     """
2133     self.bad = False
2134     self._feedback_fn = feedback_fn
2135
2136     feedback_fn("* Verifying cluster config")
2137
2138     for msg in self.cfg.VerifyConfig():
2139       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2140
2141     feedback_fn("* Verifying cluster certificate files")
2142
2143     for cert_filename in pathutils.ALL_CERT_FILES:
2144       (errcode, msg) = _VerifyCertificate(cert_filename)
2145       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2146
2147     feedback_fn("* Verifying hypervisor parameters")
2148
2149     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2150                                                 self.all_inst_info.values()))
2151
2152     feedback_fn("* Verifying all nodes belong to an existing group")
2153
2154     # We do this verification here because, should this bogus circumstance
2155     # occur, it would never be caught by VerifyGroup, which only acts on
2156     # nodes/instances reachable from existing node groups.
2157
2158     dangling_nodes = set(node.name for node in self.all_node_info.values()
2159                          if node.group not in self.all_group_info)
2160
2161     dangling_instances = {}
2162     no_node_instances = []
2163
2164     for inst in self.all_inst_info.values():
2165       if inst.primary_node in dangling_nodes:
2166         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2167       elif inst.primary_node not in self.all_node_info:
2168         no_node_instances.append(inst.name)
2169
2170     pretty_dangling = [
2171         "%s (%s)" %
2172         (node.name,
2173          utils.CommaJoin(dangling_instances.get(node.name,
2174                                                 ["no instances"])))
2175         for node in dangling_nodes]
2176
2177     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2178                   None,
2179                   "the following nodes (and their instances) belong to a non"
2180                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2181
2182     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2183                   None,
2184                   "the following instances have a non-existing primary-node:"
2185                   " %s", utils.CommaJoin(no_node_instances))
2186
2187     return not self.bad
2188
2189
2190 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2191   """Verifies the status of a node group.
2192
2193   """
2194   HPATH = "cluster-verify"
2195   HTYPE = constants.HTYPE_CLUSTER
2196   REQ_BGL = False
2197
2198   _HOOKS_INDENT_RE = re.compile("^", re.M)
2199
2200   class NodeImage(object):
2201     """A class representing the logical and physical status of a node.
2202
2203     @type name: string
2204     @ivar name: the node name to which this object refers
2205     @ivar volumes: a structure as returned from
2206         L{ganeti.backend.GetVolumeList} (runtime)
2207     @ivar instances: a list of running instances (runtime)
2208     @ivar pinst: list of configured primary instances (config)
2209     @ivar sinst: list of configured secondary instances (config)
2210     @ivar sbp: dictionary of {primary-node: list of instances} for all
2211         instances for which this node is secondary (config)
2212     @ivar mfree: free memory, as reported by hypervisor (runtime)
2213     @ivar dfree: free disk, as reported by the node (runtime)
2214     @ivar offline: the offline status (config)
2215     @type rpc_fail: boolean
2216     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2217         not whether the individual keys were correct) (runtime)
2218     @type lvm_fail: boolean
2219     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2220     @type hyp_fail: boolean
2221     @ivar hyp_fail: whether the RPC call didn't return the instance list
2222     @type ghost: boolean
2223     @ivar ghost: whether this is a known node or not (config)
2224     @type os_fail: boolean
2225     @ivar os_fail: whether the RPC call didn't return valid OS data
2226     @type oslist: list
2227     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2228     @type vm_capable: boolean
2229     @ivar vm_capable: whether the node can host instances
2230     @type pv_min: float
2231     @ivar pv_min: size in MiB of the smallest PVs
2232     @type pv_max: float
2233     @ivar pv_max: size in MiB of the biggest PVs
2234
2235     """
2236     def __init__(self, offline=False, name=None, vm_capable=True):
2237       self.name = name
2238       self.volumes = {}
2239       self.instances = []
2240       self.pinst = []
2241       self.sinst = []
2242       self.sbp = {}
2243       self.mfree = 0
2244       self.dfree = 0
2245       self.offline = offline
2246       self.vm_capable = vm_capable
2247       self.rpc_fail = False
2248       self.lvm_fail = False
2249       self.hyp_fail = False
2250       self.ghost = False
2251       self.os_fail = False
2252       self.oslist = {}
2253       self.pv_min = None
2254       self.pv_max = None
2255
2256   def ExpandNames(self):
2257     # This raises errors.OpPrereqError on its own:
2258     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2259
2260     # Get instances in node group; this is unsafe and needs verification later
2261     inst_names = \
2262       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2263
2264     self.needed_locks = {
2265       locking.LEVEL_INSTANCE: inst_names,
2266       locking.LEVEL_NODEGROUP: [self.group_uuid],
2267       locking.LEVEL_NODE: [],
2268
2269       # This opcode is run by watcher every five minutes and acquires all nodes
2270       # for a group. It doesn't run for a long time, so it's better to acquire
2271       # the node allocation lock as well.
2272       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2273       }
2274
2275     self.share_locks = _ShareAll()
2276
2277   def DeclareLocks(self, level):
2278     if level == locking.LEVEL_NODE:
2279       # Get members of node group; this is unsafe and needs verification later
2280       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2281
2282       all_inst_info = self.cfg.GetAllInstancesInfo()
2283
2284       # In Exec(), we warn about mirrored instances that have primary and
2285       # secondary living in separate node groups. To fully verify that
2286       # volumes for these instances are healthy, we will need to do an
2287       # extra call to their secondaries. We ensure here those nodes will
2288       # be locked.
2289       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2290         # Important: access only the instances whose lock is owned
2291         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2292           nodes.update(all_inst_info[inst].secondary_nodes)
2293
2294       self.needed_locks[locking.LEVEL_NODE] = nodes
2295
2296   def CheckPrereq(self):
2297     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2298     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2299
2300     group_nodes = set(self.group_info.members)
2301     group_instances = \
2302       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2303
2304     unlocked_nodes = \
2305         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2306
2307     unlocked_instances = \
2308         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2309
2310     if unlocked_nodes:
2311       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2312                                  utils.CommaJoin(unlocked_nodes),
2313                                  errors.ECODE_STATE)
2314
2315     if unlocked_instances:
2316       raise errors.OpPrereqError("Missing lock for instances: %s" %
2317                                  utils.CommaJoin(unlocked_instances),
2318                                  errors.ECODE_STATE)
2319
2320     self.all_node_info = self.cfg.GetAllNodesInfo()
2321     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2322
2323     self.my_node_names = utils.NiceSort(group_nodes)
2324     self.my_inst_names = utils.NiceSort(group_instances)
2325
2326     self.my_node_info = dict((name, self.all_node_info[name])
2327                              for name in self.my_node_names)
2328
2329     self.my_inst_info = dict((name, self.all_inst_info[name])
2330                              for name in self.my_inst_names)
2331
2332     # We detect here the nodes that will need the extra RPC calls for verifying
2333     # split LV volumes; they should be locked.
2334     extra_lv_nodes = set()
2335
2336     for inst in self.my_inst_info.values():
2337       if inst.disk_template in constants.DTS_INT_MIRROR:
2338         for nname in inst.all_nodes:
2339           if self.all_node_info[nname].group != self.group_uuid:
2340             extra_lv_nodes.add(nname)
2341
2342     unlocked_lv_nodes = \
2343         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2344
2345     if unlocked_lv_nodes:
2346       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2347                                  utils.CommaJoin(unlocked_lv_nodes),
2348                                  errors.ECODE_STATE)
2349     self.extra_lv_nodes = list(extra_lv_nodes)
2350
2351   def _VerifyNode(self, ninfo, nresult):
2352     """Perform some basic validation on data returned from a node.
2353
2354       - check the result data structure is well formed and has all the
2355         mandatory fields
2356       - check ganeti version
2357
2358     @type ninfo: L{objects.Node}
2359     @param ninfo: the node to check
2360     @param nresult: the results from the node
2361     @rtype: boolean
2362     @return: whether overall this call was successful (and we can expect
2363          reasonable values in the respose)
2364
2365     """
2366     node = ninfo.name
2367     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2368
2369     # main result, nresult should be a non-empty dict
2370     test = not nresult or not isinstance(nresult, dict)
2371     _ErrorIf(test, constants.CV_ENODERPC, node,
2372                   "unable to verify node: no data returned")
2373     if test:
2374       return False
2375
2376     # compares ganeti version
2377     local_version = constants.PROTOCOL_VERSION
2378     remote_version = nresult.get("version", None)
2379     test = not (remote_version and
2380                 isinstance(remote_version, (list, tuple)) and
2381                 len(remote_version) == 2)
2382     _ErrorIf(test, constants.CV_ENODERPC, node,
2383              "connection to node returned invalid data")
2384     if test:
2385       return False
2386
2387     test = local_version != remote_version[0]
2388     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2389              "incompatible protocol versions: master %s,"
2390              " node %s", local_version, remote_version[0])
2391     if test:
2392       return False
2393
2394     # node seems compatible, we can actually try to look into its results
2395
2396     # full package version
2397     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2398                   constants.CV_ENODEVERSION, node,
2399                   "software version mismatch: master %s, node %s",
2400                   constants.RELEASE_VERSION, remote_version[1],
2401                   code=self.ETYPE_WARNING)
2402
2403     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2404     if ninfo.vm_capable and isinstance(hyp_result, dict):
2405       for hv_name, hv_result in hyp_result.iteritems():
2406         test = hv_result is not None
2407         _ErrorIf(test, constants.CV_ENODEHV, node,
2408                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2409
2410     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2411     if ninfo.vm_capable and isinstance(hvp_result, list):
2412       for item, hv_name, hv_result in hvp_result:
2413         _ErrorIf(True, constants.CV_ENODEHV, node,
2414                  "hypervisor %s parameter verify failure (source %s): %s",
2415                  hv_name, item, hv_result)
2416
2417     test = nresult.get(constants.NV_NODESETUP,
2418                        ["Missing NODESETUP results"])
2419     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2420              "; ".join(test))
2421
2422     return True
2423
2424   def _VerifyNodeTime(self, ninfo, nresult,
2425                       nvinfo_starttime, nvinfo_endtime):
2426     """Check the node time.
2427
2428     @type ninfo: L{objects.Node}
2429     @param ninfo: the node to check
2430     @param nresult: the remote results for the node
2431     @param nvinfo_starttime: the start time of the RPC call
2432     @param nvinfo_endtime: the end time of the RPC call
2433
2434     """
2435     node = ninfo.name
2436     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2437
2438     ntime = nresult.get(constants.NV_TIME, None)
2439     try:
2440       ntime_merged = utils.MergeTime(ntime)
2441     except (ValueError, TypeError):
2442       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2443       return
2444
2445     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2446       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2447     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2448       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2449     else:
2450       ntime_diff = None
2451
2452     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2453              "Node time diverges by at least %s from master node time",
2454              ntime_diff)
2455
2456   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2457     """Check the node LVM results and update info for cross-node checks.
2458
2459     @type ninfo: L{objects.Node}
2460     @param ninfo: the node to check
2461     @param nresult: the remote results for the node
2462     @param vg_name: the configured VG name
2463     @type nimg: L{NodeImage}
2464     @param nimg: node image
2465
2466     """
2467     if vg_name is None:
2468       return
2469
2470     node = ninfo.name
2471     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2472
2473     # checks vg existence and size > 20G
2474     vglist = nresult.get(constants.NV_VGLIST, None)
2475     test = not vglist
2476     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2477     if not test:
2478       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2479                                             constants.MIN_VG_SIZE)
2480       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2481
2482     # Check PVs
2483     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2484     for em in errmsgs:
2485       self._Error(constants.CV_ENODELVM, node, em)
2486     if pvminmax is not None:
2487       (nimg.pv_min, nimg.pv_max) = pvminmax
2488
2489   def _VerifyGroupLVM(self, node_image, vg_name):
2490     """Check cross-node consistency in LVM.
2491
2492     @type node_image: dict
2493     @param node_image: info about nodes, mapping from node to names to
2494       L{NodeImage} objects
2495     @param vg_name: the configured VG name
2496
2497     """
2498     if vg_name is None:
2499       return
2500
2501     # Only exlcusive storage needs this kind of checks
2502     if not self._exclusive_storage:
2503       return
2504
2505     # exclusive_storage wants all PVs to have the same size (approximately),
2506     # if the smallest and the biggest ones are okay, everything is fine.
2507     # pv_min is None iff pv_max is None
2508     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2509     if not vals:
2510       return
2511     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2512     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2513     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2514     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2515                   "PV sizes differ too much in the group; smallest (%s MB) is"
2516                   " on %s, biggest (%s MB) is on %s",
2517                   pvmin, minnode, pvmax, maxnode)
2518
2519   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2520     """Check the node bridges.
2521
2522     @type ninfo: L{objects.Node}
2523     @param ninfo: the node to check
2524     @param nresult: the remote results for the node
2525     @param bridges: the expected list of bridges
2526
2527     """
2528     if not bridges:
2529       return
2530
2531     node = ninfo.name
2532     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2533
2534     missing = nresult.get(constants.NV_BRIDGES, None)
2535     test = not isinstance(missing, list)
2536     _ErrorIf(test, constants.CV_ENODENET, node,
2537              "did not return valid bridge information")
2538     if not test:
2539       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2540                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2541
2542   def _VerifyNodeUserScripts(self, ninfo, nresult):
2543     """Check the results of user scripts presence and executability on the node
2544
2545     @type ninfo: L{objects.Node}
2546     @param ninfo: the node to check
2547     @param nresult: the remote results for the node
2548
2549     """
2550     node = ninfo.name
2551
2552     test = not constants.NV_USERSCRIPTS in nresult
2553     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2554                   "did not return user scripts information")
2555
2556     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2557     if not test:
2558       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2559                     "user scripts not present or not executable: %s" %
2560                     utils.CommaJoin(sorted(broken_scripts)))
2561
2562   def _VerifyNodeNetwork(self, ninfo, nresult):
2563     """Check the node network connectivity results.
2564
2565     @type ninfo: L{objects.Node}
2566     @param ninfo: the node to check
2567     @param nresult: the remote results for the node
2568
2569     """
2570     node = ninfo.name
2571     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2572
2573     test = constants.NV_NODELIST not in nresult
2574     _ErrorIf(test, constants.CV_ENODESSH, node,
2575              "node hasn't returned node ssh connectivity data")
2576     if not test:
2577       if nresult[constants.NV_NODELIST]:
2578         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2579           _ErrorIf(True, constants.CV_ENODESSH, node,
2580                    "ssh communication with node '%s': %s", a_node, a_msg)
2581
2582     test = constants.NV_NODENETTEST not in nresult
2583     _ErrorIf(test, constants.CV_ENODENET, node,
2584              "node hasn't returned node tcp connectivity data")
2585     if not test:
2586       if nresult[constants.NV_NODENETTEST]:
2587         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2588         for anode in nlist:
2589           _ErrorIf(True, constants.CV_ENODENET, node,
2590                    "tcp communication with node '%s': %s",
2591                    anode, nresult[constants.NV_NODENETTEST][anode])
2592
2593     test = constants.NV_MASTERIP not in nresult
2594     _ErrorIf(test, constants.CV_ENODENET, node,
2595              "node hasn't returned node master IP reachability data")
2596     if not test:
2597       if not nresult[constants.NV_MASTERIP]:
2598         if node == self.master_node:
2599           msg = "the master node cannot reach the master IP (not configured?)"
2600         else:
2601           msg = "cannot reach the master IP"
2602         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2603
2604   def _VerifyInstance(self, instance, inst_config, node_image,
2605                       diskstatus):
2606     """Verify an instance.
2607
2608     This function checks to see if the required block devices are
2609     available on the instance's node, and that the nodes are in the correct
2610     state.
2611
2612     """
2613     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2614     pnode = inst_config.primary_node
2615     pnode_img = node_image[pnode]
2616     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2617
2618     node_vol_should = {}
2619     inst_config.MapLVsByNode(node_vol_should)
2620
2621     cluster = self.cfg.GetClusterInfo()
2622     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2623                                                             self.group_info)
2624     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2625     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2626              code=self.ETYPE_WARNING)
2627
2628     for node in node_vol_should:
2629       n_img = node_image[node]
2630       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2631         # ignore missing volumes on offline or broken nodes
2632         continue
2633       for volume in node_vol_should[node]:
2634         test = volume not in n_img.volumes
2635         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2636                  "volume %s missing on node %s", volume, node)
2637
2638     if inst_config.admin_state == constants.ADMINST_UP:
2639       test = instance not in pnode_img.instances and not pnode_img.offline
2640       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2641                "instance not running on its primary node %s",
2642                pnode)
2643       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2644                "instance is marked as running and lives on offline node %s",
2645                pnode)
2646
2647     diskdata = [(nname, success, status, idx)
2648                 for (nname, disks) in diskstatus.items()
2649                 for idx, (success, status) in enumerate(disks)]
2650
2651     for nname, success, bdev_status, idx in diskdata:
2652       # the 'ghost node' construction in Exec() ensures that we have a
2653       # node here
2654       snode = node_image[nname]
2655       bad_snode = snode.ghost or snode.offline
2656       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2657                not success and not bad_snode,
2658                constants.CV_EINSTANCEFAULTYDISK, instance,
2659                "couldn't retrieve status for disk/%s on %s: %s",
2660                idx, nname, bdev_status)
2661       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2662                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2663                constants.CV_EINSTANCEFAULTYDISK, instance,
2664                "disk/%s on %s is faulty", idx, nname)
2665
2666     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2667              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2668              " primary node failed", instance)
2669
2670     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2671              constants.CV_EINSTANCELAYOUT,
2672              instance, "instance has multiple secondary nodes: %s",
2673              utils.CommaJoin(inst_config.secondary_nodes),
2674              code=self.ETYPE_WARNING)
2675
2676     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2677       # Disk template not compatible with exclusive_storage: no instance
2678       # node should have the flag set
2679       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2680                                                      inst_config.all_nodes)
2681       es_nodes = [n for (n, es) in es_flags.items()
2682                   if es]
2683       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2684                "instance has template %s, which is not supported on nodes"
2685                " that have exclusive storage set: %s",
2686                inst_config.disk_template, utils.CommaJoin(es_nodes))
2687
2688     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2689       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2690       instance_groups = {}
2691
2692       for node in instance_nodes:
2693         instance_groups.setdefault(self.all_node_info[node].group,
2694                                    []).append(node)
2695
2696       pretty_list = [
2697         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2698         # Sort so that we always list the primary node first.
2699         for group, nodes in sorted(instance_groups.items(),
2700                                    key=lambda (_, nodes): pnode in nodes,
2701                                    reverse=True)]
2702
2703       self._ErrorIf(len(instance_groups) > 1,
2704                     constants.CV_EINSTANCESPLITGROUPS,
2705                     instance, "instance has primary and secondary nodes in"
2706                     " different groups: %s", utils.CommaJoin(pretty_list),
2707                     code=self.ETYPE_WARNING)
2708
2709     inst_nodes_offline = []
2710     for snode in inst_config.secondary_nodes:
2711       s_img = node_image[snode]
2712       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2713                snode, "instance %s, connection to secondary node failed",
2714                instance)
2715
2716       if s_img.offline:
2717         inst_nodes_offline.append(snode)
2718
2719     # warn that the instance lives on offline nodes
2720     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2721              "instance has offline secondary node(s) %s",
2722              utils.CommaJoin(inst_nodes_offline))
2723     # ... or ghost/non-vm_capable nodes
2724     for node in inst_config.all_nodes:
2725       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2726                instance, "instance lives on ghost node %s", node)
2727       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2728                instance, "instance lives on non-vm_capable node %s", node)
2729
2730   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2731     """Verify if there are any unknown volumes in the cluster.
2732
2733     The .os, .swap and backup volumes are ignored. All other volumes are
2734     reported as unknown.
2735
2736     @type reserved: L{ganeti.utils.FieldSet}
2737     @param reserved: a FieldSet of reserved volume names
2738
2739     """
2740     for node, n_img in node_image.items():
2741       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2742           self.all_node_info[node].group != self.group_uuid):
2743         # skip non-healthy nodes
2744         continue
2745       for volume in n_img.volumes:
2746         test = ((node not in node_vol_should or
2747                 volume not in node_vol_should[node]) and
2748                 not reserved.Matches(volume))
2749         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2750                       "volume %s is unknown", volume)
2751
2752   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2753     """Verify N+1 Memory Resilience.
2754
2755     Check that if one single node dies we can still start all the
2756     instances it was primary for.
2757
2758     """
2759     cluster_info = self.cfg.GetClusterInfo()
2760     for node, n_img in node_image.items():
2761       # This code checks that every node which is now listed as
2762       # secondary has enough memory to host all instances it is
2763       # supposed to should a single other node in the cluster fail.
2764       # FIXME: not ready for failover to an arbitrary node
2765       # FIXME: does not support file-backed instances
2766       # WARNING: we currently take into account down instances as well
2767       # as up ones, considering that even if they're down someone
2768       # might want to start them even in the event of a node failure.
2769       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2770         # we're skipping nodes marked offline and nodes in other groups from
2771         # the N+1 warning, since most likely we don't have good memory
2772         # infromation from them; we already list instances living on such
2773         # nodes, and that's enough warning
2774         continue
2775       #TODO(dynmem): also consider ballooning out other instances
2776       for prinode, instances in n_img.sbp.items():
2777         needed_mem = 0
2778         for instance in instances:
2779           bep = cluster_info.FillBE(instance_cfg[instance])
2780           if bep[constants.BE_AUTO_BALANCE]:
2781             needed_mem += bep[constants.BE_MINMEM]
2782         test = n_img.mfree < needed_mem
2783         self._ErrorIf(test, constants.CV_ENODEN1, node,
2784                       "not enough memory to accomodate instance failovers"
2785                       " should node %s fail (%dMiB needed, %dMiB available)",
2786                       prinode, needed_mem, n_img.mfree)
2787
2788   @classmethod
2789   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2790                    (files_all, files_opt, files_mc, files_vm)):
2791     """Verifies file checksums collected from all nodes.
2792
2793     @param errorif: Callback for reporting errors
2794     @param nodeinfo: List of L{objects.Node} objects
2795     @param master_node: Name of master node
2796     @param all_nvinfo: RPC results
2797
2798     """
2799     # Define functions determining which nodes to consider for a file
2800     files2nodefn = [
2801       (files_all, None),
2802       (files_mc, lambda node: (node.master_candidate or
2803                                node.name == master_node)),
2804       (files_vm, lambda node: node.vm_capable),
2805       ]
2806
2807     # Build mapping from filename to list of nodes which should have the file
2808     nodefiles = {}
2809     for (files, fn) in files2nodefn:
2810       if fn is None:
2811         filenodes = nodeinfo
2812       else:
2813         filenodes = filter(fn, nodeinfo)
2814       nodefiles.update((filename,
2815                         frozenset(map(operator.attrgetter("name"), filenodes)))
2816                        for filename in files)
2817
2818     assert set(nodefiles) == (files_all | files_mc | files_vm)
2819
2820     fileinfo = dict((filename, {}) for filename in nodefiles)
2821     ignore_nodes = set()
2822
2823     for node in nodeinfo:
2824       if node.offline:
2825         ignore_nodes.add(node.name)
2826         continue
2827
2828       nresult = all_nvinfo[node.name]
2829
2830       if nresult.fail_msg or not nresult.payload:
2831         node_files = None
2832       else:
2833         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2834         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2835                           for (key, value) in fingerprints.items())
2836         del fingerprints
2837
2838       test = not (node_files and isinstance(node_files, dict))
2839       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2840               "Node did not return file checksum data")
2841       if test:
2842         ignore_nodes.add(node.name)
2843         continue
2844
2845       # Build per-checksum mapping from filename to nodes having it
2846       for (filename, checksum) in node_files.items():
2847         assert filename in nodefiles
2848         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2849
2850     for (filename, checksums) in fileinfo.items():
2851       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2852
2853       # Nodes having the file
2854       with_file = frozenset(node_name
2855                             for nodes in fileinfo[filename].values()
2856                             for node_name in nodes) - ignore_nodes
2857
2858       expected_nodes = nodefiles[filename] - ignore_nodes
2859
2860       # Nodes missing file
2861       missing_file = expected_nodes - with_file
2862
2863       if filename in files_opt:
2864         # All or no nodes
2865         errorif(missing_file and missing_file != expected_nodes,
2866                 constants.CV_ECLUSTERFILECHECK, None,
2867                 "File %s is optional, but it must exist on all or no"
2868                 " nodes (not found on %s)",
2869                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2870       else:
2871         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2872                 "File %s is missing from node(s) %s", filename,
2873                 utils.CommaJoin(utils.NiceSort(missing_file)))
2874
2875         # Warn if a node has a file it shouldn't
2876         unexpected = with_file - expected_nodes
2877         errorif(unexpected,
2878                 constants.CV_ECLUSTERFILECHECK, None,
2879                 "File %s should not exist on node(s) %s",
2880                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2881
2882       # See if there are multiple versions of the file
2883       test = len(checksums) > 1
2884       if test:
2885         variants = ["variant %s on %s" %
2886                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2887                     for (idx, (checksum, nodes)) in
2888                       enumerate(sorted(checksums.items()))]
2889       else:
2890         variants = []
2891
2892       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2893               "File %s found with %s different checksums (%s)",
2894               filename, len(checksums), "; ".join(variants))
2895
2896   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2897                       drbd_map):
2898     """Verifies and the node DRBD status.
2899
2900     @type ninfo: L{objects.Node}
2901     @param ninfo: the node to check
2902     @param nresult: the remote results for the node
2903     @param instanceinfo: the dict of instances
2904     @param drbd_helper: the configured DRBD usermode helper
2905     @param drbd_map: the DRBD map as returned by
2906         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2907
2908     """
2909     node = ninfo.name
2910     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2911
2912     if drbd_helper:
2913       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2914       test = (helper_result is None)
2915       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2916                "no drbd usermode helper returned")
2917       if helper_result:
2918         status, payload = helper_result
2919         test = not status
2920         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921                  "drbd usermode helper check unsuccessful: %s", payload)
2922         test = status and (payload != drbd_helper)
2923         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2924                  "wrong drbd usermode helper: %s", payload)
2925
2926     # compute the DRBD minors
2927     node_drbd = {}
2928     for minor, instance in drbd_map[node].items():
2929       test = instance not in instanceinfo
2930       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2931                "ghost instance '%s' in temporary DRBD map", instance)
2932         # ghost instance should not be running, but otherwise we
2933         # don't give double warnings (both ghost instance and
2934         # unallocated minor in use)
2935       if test:
2936         node_drbd[minor] = (instance, False)
2937       else:
2938         instance = instanceinfo[instance]
2939         node_drbd[minor] = (instance.name,
2940                             instance.admin_state == constants.ADMINST_UP)
2941
2942     # and now check them
2943     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2944     test = not isinstance(used_minors, (tuple, list))
2945     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2946              "cannot parse drbd status file: %s", str(used_minors))
2947     if test:
2948       # we cannot check drbd status
2949       return
2950
2951     for minor, (iname, must_exist) in node_drbd.items():
2952       test = minor not in used_minors and must_exist
2953       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2954                "drbd minor %d of instance %s is not active", minor, iname)
2955     for minor in used_minors:
2956       test = minor not in node_drbd
2957       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2958                "unallocated drbd minor %d is in use", minor)
2959
2960   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2961     """Builds the node OS structures.
2962
2963     @type ninfo: L{objects.Node}
2964     @param ninfo: the node to check
2965     @param nresult: the remote results for the node
2966     @param nimg: the node image object
2967
2968     """
2969     node = ninfo.name
2970     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2971
2972     remote_os = nresult.get(constants.NV_OSLIST, None)
2973     test = (not isinstance(remote_os, list) or
2974             not compat.all(isinstance(v, list) and len(v) == 7
2975                            for v in remote_os))
2976
2977     _ErrorIf(test, constants.CV_ENODEOS, node,
2978              "node hasn't returned valid OS data")
2979
2980     nimg.os_fail = test
2981
2982     if test:
2983       return
2984
2985     os_dict = {}
2986
2987     for (name, os_path, status, diagnose,
2988          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2989
2990       if name not in os_dict:
2991         os_dict[name] = []
2992
2993       # parameters is a list of lists instead of list of tuples due to
2994       # JSON lacking a real tuple type, fix it:
2995       parameters = [tuple(v) for v in parameters]
2996       os_dict[name].append((os_path, status, diagnose,
2997                             set(variants), set(parameters), set(api_ver)))
2998
2999     nimg.oslist = os_dict
3000
3001   def _VerifyNodeOS(self, ninfo, nimg, base):
3002     """Verifies the node OS list.
3003
3004     @type ninfo: L{objects.Node}
3005     @param ninfo: the node to check
3006     @param nimg: the node image object
3007     @param base: the 'template' node we match against (e.g. from the master)
3008
3009     """
3010     node = ninfo.name
3011     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3012
3013     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3014
3015     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3016     for os_name, os_data in nimg.oslist.items():
3017       assert os_data, "Empty OS status for OS %s?!" % os_name
3018       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3019       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3020                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3021       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3022                "OS '%s' has multiple entries (first one shadows the rest): %s",
3023                os_name, utils.CommaJoin([v[0] for v in os_data]))
3024       # comparisons with the 'base' image
3025       test = os_name not in base.oslist
3026       _ErrorIf(test, constants.CV_ENODEOS, node,
3027                "Extra OS %s not present on reference node (%s)",
3028                os_name, base.name)
3029       if test:
3030         continue
3031       assert base.oslist[os_name], "Base node has empty OS status?"
3032       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3033       if not b_status:
3034         # base OS is invalid, skipping
3035         continue
3036       for kind, a, b in [("API version", f_api, b_api),
3037                          ("variants list", f_var, b_var),
3038                          ("parameters", beautify_params(f_param),
3039                           beautify_params(b_param))]:
3040         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3041                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3042                  kind, os_name, base.name,
3043                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3044
3045     # check any missing OSes
3046     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3047     _ErrorIf(missing, constants.CV_ENODEOS, node,
3048              "OSes present on reference node %s but missing on this node: %s",
3049              base.name, utils.CommaJoin(missing))
3050
3051   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3052     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3053
3054     @type ninfo: L{objects.Node}
3055     @param ninfo: the node to check
3056     @param nresult: the remote results for the node
3057     @type is_master: bool
3058     @param is_master: Whether node is the master node
3059
3060     """
3061     node = ninfo.name
3062
3063     if (is_master and
3064         (constants.ENABLE_FILE_STORAGE or
3065          constants.ENABLE_SHARED_FILE_STORAGE)):
3066       try:
3067         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3068       except KeyError:
3069         # This should never happen
3070         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3071                       "Node did not return forbidden file storage paths")
3072       else:
3073         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3074                       "Found forbidden file storage paths: %s",
3075                       utils.CommaJoin(fspaths))
3076     else:
3077       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3078                     constants.CV_ENODEFILESTORAGEPATHS, node,
3079                     "Node should not have returned forbidden file storage"
3080                     " paths")
3081
3082   def _VerifyOob(self, ninfo, nresult):
3083     """Verifies out of band functionality of a node.
3084
3085     @type ninfo: L{objects.Node}
3086     @param ninfo: the node to check
3087     @param nresult: the remote results for the node
3088
3089     """
3090     node = ninfo.name
3091     # We just have to verify the paths on master and/or master candidates
3092     # as the oob helper is invoked on the master
3093     if ((ninfo.master_candidate or ninfo.master_capable) and
3094         constants.NV_OOB_PATHS in nresult):
3095       for path_result in nresult[constants.NV_OOB_PATHS]:
3096         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3097
3098   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3099     """Verifies and updates the node volume data.
3100
3101     This function will update a L{NodeImage}'s internal structures
3102     with data from the remote call.
3103
3104     @type ninfo: L{objects.Node}
3105     @param ninfo: the node to check
3106     @param nresult: the remote results for the node
3107     @param nimg: the node image object
3108     @param vg_name: the configured VG name
3109
3110     """
3111     node = ninfo.name
3112     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3113
3114     nimg.lvm_fail = True
3115     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3116     if vg_name is None:
3117       pass
3118     elif isinstance(lvdata, basestring):
3119       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3120                utils.SafeEncode(lvdata))
3121     elif not isinstance(lvdata, dict):
3122       _ErrorIf(True, constants.CV_ENODELVM, node,
3123                "rpc call to node failed (lvlist)")
3124     else:
3125       nimg.volumes = lvdata
3126       nimg.lvm_fail = False
3127
3128   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3129     """Verifies and updates the node instance list.
3130
3131     If the listing was successful, then updates this node's instance
3132     list. Otherwise, it marks the RPC call as failed for the instance
3133     list key.
3134
3135     @type ninfo: L{objects.Node}
3136     @param ninfo: the node to check
3137     @param nresult: the remote results for the node
3138     @param nimg: the node image object
3139
3140     """
3141     idata = nresult.get(constants.NV_INSTANCELIST, None)
3142     test = not isinstance(idata, list)
3143     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3144                   "rpc call to node failed (instancelist): %s",
3145                   utils.SafeEncode(str(idata)))
3146     if test:
3147       nimg.hyp_fail = True
3148     else:
3149       nimg.instances = idata
3150
3151   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3152     """Verifies and computes a node information map
3153
3154     @type ninfo: L{objects.Node}
3155     @param ninfo: the node to check
3156     @param nresult: the remote results for the node
3157     @param nimg: the node image object
3158     @param vg_name: the configured VG name
3159
3160     """
3161     node = ninfo.name
3162     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3163
3164     # try to read free memory (from the hypervisor)
3165     hv_info = nresult.get(constants.NV_HVINFO, None)
3166     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3167     _ErrorIf(test, constants.CV_ENODEHV, node,
3168              "rpc call to node failed (hvinfo)")
3169     if not test:
3170       try:
3171         nimg.mfree = int(hv_info["memory_free"])
3172       except (ValueError, TypeError):
3173         _ErrorIf(True, constants.CV_ENODERPC, node,
3174                  "node returned invalid nodeinfo, check hypervisor")
3175
3176     # FIXME: devise a free space model for file based instances as well
3177     if vg_name is not None:
3178       test = (constants.NV_VGLIST not in nresult or
3179               vg_name not in nresult[constants.NV_VGLIST])
3180       _ErrorIf(test, constants.CV_ENODELVM, node,
3181                "node didn't return data for the volume group '%s'"
3182                " - it is either missing or broken", vg_name)
3183       if not test:
3184         try:
3185           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3186         except (ValueError, TypeError):
3187           _ErrorIf(True, constants.CV_ENODERPC, node,
3188                    "node returned invalid LVM info, check LVM status")
3189
3190   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3191     """Gets per-disk status information for all instances.
3192
3193     @type nodelist: list of strings
3194     @param nodelist: Node names
3195     @type node_image: dict of (name, L{objects.Node})
3196     @param node_image: Node objects
3197     @type instanceinfo: dict of (name, L{objects.Instance})
3198     @param instanceinfo: Instance objects
3199     @rtype: {instance: {node: [(succes, payload)]}}
3200     @return: a dictionary of per-instance dictionaries with nodes as
3201         keys and disk information as values; the disk information is a
3202         list of tuples (success, payload)
3203
3204     """
3205     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3206
3207     node_disks = {}
3208     node_disks_devonly = {}
3209     diskless_instances = set()
3210     diskless = constants.DT_DISKLESS
3211
3212     for nname in nodelist:
3213       node_instances = list(itertools.chain(node_image[nname].pinst,
3214                                             node_image[nname].sinst))
3215       diskless_instances.update(inst for inst in node_instances
3216                                 if instanceinfo[inst].disk_template == diskless)
3217       disks = [(inst, disk)
3218                for inst in node_instances
3219                for disk in instanceinfo[inst].disks]
3220
3221       if not disks:
3222         # No need to collect data
3223         continue
3224
3225       node_disks[nname] = disks
3226
3227       # _AnnotateDiskParams makes already copies of the disks
3228       devonly = []
3229       for (inst, dev) in disks:
3230         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3231         self.cfg.SetDiskID(anno_disk, nname)
3232         devonly.append(anno_disk)
3233
3234       node_disks_devonly[nname] = devonly
3235
3236     assert len(node_disks) == len(node_disks_devonly)
3237
3238     # Collect data from all nodes with disks
3239     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3240                                                           node_disks_devonly)
3241
3242     assert len(result) == len(node_disks)
3243
3244     instdisk = {}
3245
3246     for (nname, nres) in result.items():
3247       disks = node_disks[nname]
3248
3249       if nres.offline:
3250         # No data from this node
3251         data = len(disks) * [(False, "node offline")]
3252       else:
3253         msg = nres.fail_msg
3254         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3255                  "while getting disk information: %s", msg)
3256         if msg:
3257           # No data from this node
3258           data = len(disks) * [(False, msg)]
3259         else:
3260           data = []
3261           for idx, i in enumerate(nres.payload):
3262             if isinstance(i, (tuple, list)) and len(i) == 2:
3263               data.append(i)
3264             else:
3265               logging.warning("Invalid result from node %s, entry %d: %s",
3266                               nname, idx, i)
3267               data.append((False, "Invalid result from the remote node"))
3268
3269       for ((inst, _), status) in zip(disks, data):
3270         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3271
3272     # Add empty entries for diskless instances.
3273     for inst in diskless_instances:
3274       assert inst not in instdisk
3275       instdisk[inst] = {}
3276
3277     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3278                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3279                       compat.all(isinstance(s, (tuple, list)) and
3280                                  len(s) == 2 for s in statuses)
3281                       for inst, nnames in instdisk.items()
3282                       for nname, statuses in nnames.items())
3283     if __debug__:
3284       instdisk_keys = set(instdisk)
3285       instanceinfo_keys = set(instanceinfo)
3286       assert instdisk_keys == instanceinfo_keys, \
3287         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3288          (instdisk_keys, instanceinfo_keys))
3289
3290     return instdisk
3291
3292   @staticmethod
3293   def _SshNodeSelector(group_uuid, all_nodes):
3294     """Create endless iterators for all potential SSH check hosts.
3295
3296     """
3297     nodes = [node for node in all_nodes
3298              if (node.group != group_uuid and
3299                  not node.offline)]
3300     keyfunc = operator.attrgetter("group")
3301
3302     return map(itertools.cycle,
3303                [sorted(map(operator.attrgetter("name"), names))
3304                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3305                                                   keyfunc)])
3306
3307   @classmethod
3308   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3309     """Choose which nodes should talk to which other nodes.
3310
3311     We will make nodes contact all nodes in their group, and one node from
3312     every other group.
3313
3314     @warning: This algorithm has a known issue if one node group is much
3315       smaller than others (e.g. just one node). In such a case all other
3316       nodes will talk to the single node.
3317
3318     """
3319     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3320     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3321
3322     return (online_nodes,
3323             dict((name, sorted([i.next() for i in sel]))
3324                  for name in online_nodes))
3325
3326   def BuildHooksEnv(self):
3327     """Build hooks env.
3328
3329     Cluster-Verify hooks just ran in the post phase and their failure makes
3330     the output be logged in the verify output and the verification to fail.
3331
3332     """
3333     env = {
3334       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3335       }
3336
3337     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3338                for node in self.my_node_info.values())
3339
3340     return env
3341
3342   def BuildHooksNodes(self):
3343     """Build hooks nodes.
3344
3345     """
3346     return ([], self.my_node_names)
3347
3348   def Exec(self, feedback_fn):
3349     """Verify integrity of the node group, performing various test on nodes.
3350
3351     """
3352     # This method has too many local variables. pylint: disable=R0914
3353     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3354
3355     if not self.my_node_names:
3356       # empty node group
3357       feedback_fn("* Empty node group, skipping verification")
3358       return True
3359
3360     self.bad = False
3361     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3362     verbose = self.op.verbose
3363     self._feedback_fn = feedback_fn
3364
3365     vg_name = self.cfg.GetVGName()
3366     drbd_helper = self.cfg.GetDRBDHelper()
3367     cluster = self.cfg.GetClusterInfo()
3368     hypervisors = cluster.enabled_hypervisors
3369     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3370
3371     i_non_redundant = [] # Non redundant instances
3372     i_non_a_balanced = [] # Non auto-balanced instances
3373     i_offline = 0 # Count of offline instances
3374     n_offline = 0 # Count of offline nodes
3375     n_drained = 0 # Count of nodes being drained
3376     node_vol_should = {}
3377
3378     # FIXME: verify OS list
3379
3380     # File verification
3381     filemap = _ComputeAncillaryFiles(cluster, False)
3382
3383     # do local checksums
3384     master_node = self.master_node = self.cfg.GetMasterNode()
3385     master_ip = self.cfg.GetMasterIP()
3386
3387     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3388
3389     user_scripts = []
3390     if self.cfg.GetUseExternalMipScript():
3391       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3392
3393     node_verify_param = {
3394       constants.NV_FILELIST:
3395         map(vcluster.MakeVirtualPath,
3396             utils.UniqueSequence(filename
3397                                  for files in filemap
3398                                  for filename in files)),
3399       constants.NV_NODELIST:
3400         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3401                                   self.all_node_info.values()),
3402       constants.NV_HYPERVISOR: hypervisors,
3403       constants.NV_HVPARAMS:
3404         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3405       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3406                                  for node in node_data_list
3407                                  if not node.offline],
3408       constants.NV_INSTANCELIST: hypervisors,
3409       constants.NV_VERSION: None,
3410       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3411       constants.NV_NODESETUP: None,
3412       constants.NV_TIME: None,
3413       constants.NV_MASTERIP: (master_node, master_ip),
3414       constants.NV_OSLIST: None,
3415       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3416       constants.NV_USERSCRIPTS: user_scripts,
3417       }
3418
3419     if vg_name is not None:
3420       node_verify_param[constants.NV_VGLIST] = None
3421       node_verify_param[constants.NV_LVLIST] = vg_name
3422       node_verify_param[constants.NV_PVLIST] = [vg_name]
3423
3424     if drbd_helper:
3425       node_verify_param[constants.NV_DRBDLIST] = None
3426       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3427
3428     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3429       # Load file storage paths only from master node
3430       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3431
3432     # bridge checks
3433     # FIXME: this needs to be changed per node-group, not cluster-wide
3434     bridges = set()
3435     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3436     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3437       bridges.add(default_nicpp[constants.NIC_LINK])
3438     for instance in self.my_inst_info.values():
3439       for nic in instance.nics:
3440         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3441         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442           bridges.add(full_nic[constants.NIC_LINK])
3443
3444     if bridges:
3445       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3446
3447     # Build our expected cluster state
3448     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3449                                                  name=node.name,
3450                                                  vm_capable=node.vm_capable))
3451                       for node in node_data_list)
3452
3453     # Gather OOB paths
3454     oob_paths = []
3455     for node in self.all_node_info.values():
3456       path = _SupportsOob(self.cfg, node)
3457       if path and path not in oob_paths:
3458         oob_paths.append(path)
3459
3460     if oob_paths:
3461       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3462
3463     for instance in self.my_inst_names:
3464       inst_config = self.my_inst_info[instance]
3465       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3466         i_offline += 1
3467
3468       for nname in inst_config.all_nodes:
3469         if nname not in node_image:
3470           gnode = self.NodeImage(name=nname)
3471           gnode.ghost = (nname not in self.all_node_info)
3472           node_image[nname] = gnode
3473
3474       inst_config.MapLVsByNode(node_vol_should)
3475
3476       pnode = inst_config.primary_node
3477       node_image[pnode].pinst.append(instance)
3478
3479       for snode in inst_config.secondary_nodes:
3480         nimg = node_image[snode]
3481         nimg.sinst.append(instance)
3482         if pnode not in nimg.sbp:
3483           nimg.sbp[pnode] = []
3484         nimg.sbp[pnode].append(instance)
3485
3486     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3487     es_unset_nodes = []
3488     # The value of exclusive_storage should be the same across the group, so if
3489     # it's True for at least a node, we act as if it were set for all the nodes
3490     self._exclusive_storage = compat.any(es_flags.values())
3491     if self._exclusive_storage:
3492       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3493       es_unset_nodes = [n for (n, es) in es_flags.items()
3494                         if not es]
3495
3496     if es_unset_nodes:
3497       self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3498                   "The exclusive_storage flag should be uniform in a group,"
3499                   " but these nodes have it unset: %s",
3500                   utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3501       self.LogWarning("Some checks required by exclusive storage will be"
3502                       " performed also on nodes with the flag unset")
3503
3504     # At this point, we have the in-memory data structures complete,
3505     # except for the runtime information, which we'll gather next
3506
3507     # Due to the way our RPC system works, exact response times cannot be
3508     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3509     # time before and after executing the request, we can at least have a time
3510     # window.
3511     nvinfo_starttime = time.time()
3512     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3513                                            node_verify_param,
3514                                            self.cfg.GetClusterName())
3515     nvinfo_endtime = time.time()
3516
3517     if self.extra_lv_nodes and vg_name is not None:
3518       extra_lv_nvinfo = \
3519           self.rpc.call_node_verify(self.extra_lv_nodes,
3520                                     {constants.NV_LVLIST: vg_name},
3521                                     self.cfg.GetClusterName())
3522     else:
3523       extra_lv_nvinfo = {}
3524
3525     all_drbd_map = self.cfg.ComputeDRBDMap()
3526
3527     feedback_fn("* Gathering disk information (%s nodes)" %
3528                 len(self.my_node_names))
3529     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3530                                      self.my_inst_info)
3531
3532     feedback_fn("* Verifying configuration file consistency")
3533
3534     # If not all nodes are being checked, we need to make sure the master node
3535     # and a non-checked vm_capable node are in the list.
3536     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3537     if absent_nodes:
3538       vf_nvinfo = all_nvinfo.copy()
3539       vf_node_info = list(self.my_node_info.values())
3540       additional_nodes = []
3541       if master_node not in self.my_node_info:
3542         additional_nodes.append(master_node)
3543         vf_node_info.append(self.all_node_info[master_node])
3544       # Add the first vm_capable node we find which is not included,
3545       # excluding the master node (which we already have)
3546       for node in absent_nodes:
3547         nodeinfo = self.all_node_info[node]
3548         if (nodeinfo.vm_capable and not nodeinfo.offline and
3549             node != master_node):
3550           additional_nodes.append(node)
3551           vf_node_info.append(self.all_node_info[node])
3552           break
3553       key = constants.NV_FILELIST
3554       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3555                                                  {key: node_verify_param[key]},
3556                                                  self.cfg.GetClusterName()))
3557     else:
3558       vf_nvinfo = all_nvinfo
3559       vf_node_info = self.my_node_info.values()
3560
3561     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3562
3563     feedback_fn("* Verifying node status")
3564
3565     refos_img = None
3566
3567     for node_i in node_data_list:
3568       node = node_i.name
3569       nimg = node_image[node]
3570
3571       if node_i.offline:
3572         if verbose:
3573           feedback_fn("* Skipping offline node %s" % (node,))
3574         n_offline += 1
3575         continue
3576
3577       if node == master_node:
3578         ntype = "master"
3579       elif node_i.master_candidate:
3580         ntype = "master candidate"
3581       elif node_i.drained:
3582         ntype = "drained"
3583         n_drained += 1
3584       else:
3585         ntype = "regular"
3586       if verbose:
3587         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3588
3589       msg = all_nvinfo[node].fail_msg
3590       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3591                msg)
3592       if msg:
3593         nimg.rpc_fail = True
3594         continue
3595
3596       nresult = all_nvinfo[node].payload
3597
3598       nimg.call_ok = self._VerifyNode(node_i, nresult)
3599       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3600       self._VerifyNodeNetwork(node_i, nresult)
3601       self._VerifyNodeUserScripts(node_i, nresult)
3602       self._VerifyOob(node_i, nresult)
3603       self._VerifyFileStoragePaths(node_i, nresult,
3604                                    node == master_node)
3605
3606       if nimg.vm_capable:
3607         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3608         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3609                              all_drbd_map)
3610
3611         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3612         self._UpdateNodeInstances(node_i, nresult, nimg)
3613         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3614         self._UpdateNodeOS(node_i, nresult, nimg)
3615
3616         if not nimg.os_fail:
3617           if refos_img is None:
3618             refos_img = nimg
3619           self._VerifyNodeOS(node_i, nimg, refos_img)
3620         self._VerifyNodeBridges(node_i, nresult, bridges)
3621
3622         # Check whether all running instancies are primary for the node. (This
3623         # can no longer be done from _VerifyInstance below, since some of the
3624         # wrong instances could be from other node groups.)
3625         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3626
3627         for inst in non_primary_inst:
3628           test = inst in self.all_inst_info
3629           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3630                    "instance should not run on node %s", node_i.name)
3631           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3632                    "node is running unknown instance %s", inst)
3633
3634     self._VerifyGroupLVM(node_image, vg_name)
3635
3636     for node, result in extra_lv_nvinfo.items():
3637       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3638                               node_image[node], vg_name)
3639
3640     feedback_fn("* Verifying instance status")
3641     for instance in self.my_inst_names:
3642       if verbose:
3643         feedback_fn("* Verifying instance %s" % instance)
3644       inst_config = self.my_inst_info[instance]
3645       self._VerifyInstance(instance, inst_config, node_image,
3646                            instdisk[instance])
3647
3648       # If the instance is non-redundant we cannot survive losing its primary
3649       # node, so we are not N+1 compliant.
3650       if inst_config.disk_template not in constants.DTS_MIRRORED:
3651         i_non_redundant.append(instance)
3652
3653       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3654         i_non_a_balanced.append(instance)
3655
3656     feedback_fn("* Verifying orphan volumes")
3657     reserved = utils.FieldSet(*cluster.reserved_lvs)
3658
3659     # We will get spurious "unknown volume" warnings if any node of this group
3660     # is secondary for an instance whose primary is in another group. To avoid
3661     # them, we find these instances and add their volumes to node_vol_should.
3662     for inst in self.all_inst_info.values():
3663       for secondary in inst.secondary_nodes:
3664         if (secondary in self.my_node_info
3665             and inst.name not in self.my_inst_info):
3666           inst.MapLVsByNode(node_vol_should)
3667           break
3668
3669     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3670
3671     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3672       feedback_fn("* Verifying N+1 Memory redundancy")
3673       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3674
3675     feedback_fn("* Other Notes")
3676     if i_non_redundant:
3677       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3678                   % len(i_non_redundant))
3679
3680     if i_non_a_balanced:
3681       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3682                   % len(i_non_a_balanced))
3683
3684     if i_offline:
3685       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3686
3687     if n_offline:
3688       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3689
3690     if n_drained:
3691       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3692
3693     return not self.bad
3694
3695   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3696     """Analyze the post-hooks' result
3697
3698     This method analyses the hook result, handles it, and sends some
3699     nicely-formatted feedback back to the user.
3700
3701     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3702         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3703     @param hooks_results: the results of the multi-node hooks rpc call
3704     @param feedback_fn: function used send feedback back to the caller
3705     @param lu_result: previous Exec result
3706     @return: the new Exec result, based on the previous result
3707         and hook results
3708
3709     """
3710     # We only really run POST phase hooks, only for non-empty groups,
3711     # and are only interested in their results
3712     if not self.my_node_names:
3713       # empty node group
3714       pass
3715     elif phase == constants.HOOKS_PHASE_POST:
3716       # Used to change hooks' output to proper indentation
3717       feedback_fn("* Hooks Results")
3718       assert hooks_results, "invalid result from hooks"
3719
3720       for node_name in hooks_results:
3721         res = hooks_results[node_name]
3722         msg = res.fail_msg
3723         test = msg and not res.offline
3724         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3725                       "Communication failure in hooks execution: %s", msg)
3726         if res.offline or msg:
3727           # No need to investigate payload if node is offline or gave
3728           # an error.
3729           continue
3730         for script, hkr, output in res.payload:
3731           test = hkr == constants.HKR_FAIL
3732           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3733                         "Script %s failed, output:", script)
3734           if test:
3735             output = self._HOOKS_INDENT_RE.sub("      ", output)
3736             feedback_fn("%s" % output)
3737             lu_result = False
3738
3739     return lu_result
3740
3741
3742 class LUClusterVerifyDisks(NoHooksLU):
3743   """Verifies the cluster disks status.
3744
3745   """
3746   REQ_BGL = False
3747
3748   def ExpandNames(self):
3749     self.share_locks = _ShareAll()
3750     self.needed_locks = {
3751       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3752       }
3753
3754   def Exec(self, feedback_fn):
3755     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3756
3757     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3758     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3759                            for group in group_names])
3760
3761
3762 class LUGroupVerifyDisks(NoHooksLU):
3763   """Verifies the status of all disks in a node group.
3764
3765   """
3766   REQ_BGL = False
3767
3768   def ExpandNames(self):
3769     # Raises errors.OpPrereqError on its own if group can't be found
3770     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3771
3772     self.share_locks = _ShareAll()
3773     self.needed_locks = {
3774       locking.LEVEL_INSTANCE: [],
3775       locking.LEVEL_NODEGROUP: [],
3776       locking.LEVEL_NODE: [],
3777
3778       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3779       # starts one instance of this opcode for every group, which means all
3780       # nodes will be locked for a short amount of time, so it's better to
3781       # acquire the node allocation lock as well.
3782       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3783       }
3784
3785   def DeclareLocks(self, level):
3786     if level == locking.LEVEL_INSTANCE:
3787       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3788
3789       # Lock instances optimistically, needs verification once node and group
3790       # locks have been acquired
3791       self.needed_locks[locking.LEVEL_INSTANCE] = \
3792         self.cfg.GetNodeGroupInstances(self.group_uuid)
3793
3794     elif level == locking.LEVEL_NODEGROUP:
3795       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3796
3797       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3798         set([self.group_uuid] +
3799             # Lock all groups used by instances optimistically; this requires
3800             # going via the node before it's locked, requiring verification
3801             # later on
3802             [group_uuid
3803              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3804              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3805
3806     elif level == locking.LEVEL_NODE:
3807       # This will only lock the nodes in the group to be verified which contain
3808       # actual instances
3809       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3810       self._LockInstancesNodes()
3811
3812       # Lock all nodes in group to be verified
3813       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3814       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3815       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3816
3817   def CheckPrereq(self):
3818     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3819     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3820     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3821
3822     assert self.group_uuid in owned_groups
3823
3824     # Check if locked instances are still correct
3825     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3826
3827     # Get instance information
3828     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3829
3830     # Check if node groups for locked instances are still correct
3831     _CheckInstancesNodeGroups(self.cfg, self.instances,
3832                               owned_groups, owned_nodes, self.group_uuid)
3833
3834   def Exec(self, feedback_fn):
3835     """Verify integrity of cluster disks.
3836
3837     @rtype: tuple of three items
3838     @return: a tuple of (dict of node-to-node_error, list of instances
3839         which need activate-disks, dict of instance: (node, volume) for
3840         missing volumes
3841
3842     """
3843     res_nodes = {}
3844     res_instances = set()
3845     res_missing = {}
3846
3847     nv_dict = _MapInstanceDisksToNodes(
3848       [inst for inst in self.instances.values()
3849        if inst.admin_state == constants.ADMINST_UP])
3850
3851     if nv_dict:
3852       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3853                              set(self.cfg.GetVmCapableNodeList()))
3854
3855       node_lvs = self.rpc.call_lv_list(nodes, [])
3856
3857       for (node, node_res) in node_lvs.items():
3858         if node_res.offline:
3859           continue
3860
3861         msg = node_res.fail_msg
3862         if msg:
3863           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3864           res_nodes[node] = msg
3865           continue
3866
3867         for lv_name, (_, _, lv_online) in node_res.payload.items():
3868           inst = nv_dict.pop((node, lv_name), None)
3869           if not (lv_online or inst is None):
3870             res_instances.add(inst)
3871
3872       # any leftover items in nv_dict are missing LVs, let's arrange the data
3873       # better
3874       for key, inst in nv_dict.iteritems():
3875         res_missing.setdefault(inst, []).append(list(key))
3876
3877     return (res_nodes, list(res_instances), res_missing)
3878
3879
3880 class LUClusterRepairDiskSizes(NoHooksLU):
3881   """Verifies the cluster disks sizes.
3882
3883   """
3884   REQ_BGL = False
3885
3886   def ExpandNames(self):
3887     if self.op.instances:
3888       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3889       # Not getting the node allocation lock as only a specific set of
3890       # instances (and their nodes) is going to be acquired
3891       self.needed_locks = {
3892         locking.LEVEL_NODE_RES: [],
3893         locking.LEVEL_INSTANCE: self.wanted_names,
3894         }
3895       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3896     else:
3897       self.wanted_names = None
3898       self.needed_locks = {
3899         locking.LEVEL_NODE_RES: locking.ALL_SET,
3900         locking.LEVEL_INSTANCE: locking.ALL_SET,
3901
3902         # This opcode is acquires the node locks for all instances
3903         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3904         }
3905
3906     self.share_locks = {
3907       locking.LEVEL_NODE_RES: 1,
3908       locking.LEVEL_INSTANCE: 0,
3909       locking.LEVEL_NODE_ALLOC: 1,
3910       }
3911
3912   def DeclareLocks(self, level):
3913     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3914       self._LockInstancesNodes(primary_only=True, level=level)
3915
3916   def CheckPrereq(self):
3917     """Check prerequisites.
3918
3919     This only checks the optional instance list against the existing names.
3920
3921     """
3922     if self.wanted_names is None:
3923       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3924
3925     self.wanted_instances = \
3926         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3927
3928   def _EnsureChildSizes(self, disk):
3929     """Ensure children of the disk have the needed disk size.
3930
3931     This is valid mainly for DRBD8 and fixes an issue where the
3932     children have smaller disk size.
3933
3934     @param disk: an L{ganeti.objects.Disk} object
3935
3936     """
3937     if disk.dev_type == constants.LD_DRBD8:
3938       assert disk.children, "Empty children for DRBD8?"
3939       fchild = disk.children[0]
3940       mismatch = fchild.size < disk.size
3941       if mismatch:
3942         self.LogInfo("Child disk has size %d, parent %d, fixing",
3943                      fchild.size, disk.size)
3944         fchild.size = disk.size
3945
3946       # and we recurse on this child only, not on the metadev
3947       return self._EnsureChildSizes(fchild) or mismatch
3948     else:
3949       return False
3950
3951   def Exec(self, feedback_fn):
3952     """Verify the size of cluster disks.
3953
3954     """
3955     # TODO: check child disks too
3956     # TODO: check differences in size between primary/secondary nodes
3957     per_node_disks = {}
3958     for instance in self.wanted_instances:
3959       pnode = instance.primary_node
3960       if pnode not in per_node_disks:
3961         per_node_disks[pnode] = []
3962       for idx, disk in enumerate(instance.disks):
3963         per_node_disks[pnode].append((instance, idx, disk))
3964
3965     assert not (frozenset(per_node_disks.keys()) -
3966                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3967       "Not owning correct locks"
3968     assert not self.owned_locks(locking.LEVEL_NODE)
3969
3970     changed = []
3971     for node, dskl in per_node_disks.items():
3972       newl = [v[2].Copy() for v in dskl]
3973       for dsk in newl:
3974         self.cfg.SetDiskID(dsk, node)
3975       result = self.rpc.call_blockdev_getsize(node, newl)
3976       if result.fail_msg:
3977         self.LogWarning("Failure in blockdev_getsize call to node"
3978                         " %s, ignoring", node)
3979         continue
3980       if len(result.payload) != len(dskl):
3981         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3982                         " result.payload=%s", node, len(dskl), result.payload)
3983         self.LogWarning("Invalid result from node %s, ignoring node results",
3984                         node)
3985         continue
3986       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3987         if size is None:
3988           self.LogWarning("Disk %d of instance %s did not return size"
3989                           " information, ignoring", idx, instance.name)
3990           continue
3991         if not isinstance(size, (int, long)):
3992           self.LogWarning("Disk %d of instance %s did not return valid"
3993                           " size information, ignoring", idx, instance.name)
3994           continue
3995         size = size >> 20
3996         if size != disk.size:
3997           self.LogInfo("Disk %d of instance %s has mismatched size,"
3998                        " correcting: recorded %d, actual %d", idx,
3999                        instance.name, disk.size, size)
4000           disk.size = size
4001           self.cfg.Update(instance, feedback_fn)
4002           changed.append((instance.name, idx, size))
4003         if self._EnsureChildSizes(disk):
4004           self.cfg.Update(instance, feedback_fn)
4005           changed.append((instance.name, idx, disk.size))
4006     return changed
4007
4008
4009 class LUClusterRename(LogicalUnit):
4010   """Rename the cluster.
4011
4012   """
4013   HPATH = "cluster-rename"
4014   HTYPE = constants.HTYPE_CLUSTER
4015
4016   def BuildHooksEnv(self):
4017     """Build hooks env.
4018
4019     """
4020     return {
4021       "OP_TARGET": self.cfg.GetClusterName(),
4022       "NEW_NAME": self.op.name,
4023       }
4024
4025   def BuildHooksNodes(self):
4026     """Build hooks nodes.
4027
4028     """
4029     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4030
4031   def CheckPrereq(self):
4032     """Verify that the passed name is a valid one.
4033
4034     """
4035     hostname = netutils.GetHostname(name=self.op.name,
4036                                     family=self.cfg.GetPrimaryIPFamily())
4037
4038     new_name = hostname.name
4039     self.ip = new_ip = hostname.ip
4040     old_name = self.cfg.GetClusterName()
4041     old_ip = self.cfg.GetMasterIP()
4042     if new_name == old_name and new_ip == old_ip:
4043       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4044                                  " cluster has changed",
4045                                  errors.ECODE_INVAL)
4046     if new_ip != old_ip:
4047       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4048         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4049                                    " reachable on the network" %
4050                                    new_ip, errors.ECODE_NOTUNIQUE)
4051
4052     self.op.name = new_name
4053
4054   def Exec(self, feedback_fn):
4055     """Rename the cluster.
4056
4057     """
4058     clustername = self.op.name
4059     new_ip = self.ip
4060
4061     # shutdown the master IP
4062     master_params = self.cfg.GetMasterNetworkParameters()
4063     ems = self.cfg.GetUseExternalMipScript()
4064     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4065                                                      master_params, ems)
4066     result.Raise("Could not disable the master role")
4067
4068     try:
4069       cluster = self.cfg.GetClusterInfo()
4070       cluster.cluster_name = clustername
4071       cluster.master_ip = new_ip
4072       self.cfg.Update(cluster, feedback_fn)
4073
4074       # update the known hosts file
4075       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4076       node_list = self.cfg.GetOnlineNodeList()
4077       try:
4078         node_list.remove(master_params.name)
4079       except ValueError:
4080         pass
4081       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4082     finally:
4083       master_params.ip = new_ip
4084       result = self.rpc.call_node_activate_master_ip(master_params.name,
4085                                                      master_params, ems)
4086       msg = result.fail_msg
4087       if msg:
4088         self.LogWarning("Could not re-enable the master role on"
4089                         " the master, please restart manually: %s", msg)
4090
4091     return clustername
4092
4093
4094 def _ValidateNetmask(cfg, netmask):
4095   """Checks if a netmask is valid.
4096
4097   @type cfg: L{config.ConfigWriter}
4098   @param cfg: The cluster configuration
4099   @type netmask: int
4100   @param netmask: the netmask to be verified
4101   @raise errors.OpPrereqError: if the validation fails
4102
4103   """
4104   ip_family = cfg.GetPrimaryIPFamily()
4105   try:
4106     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4107   except errors.ProgrammerError:
4108     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4109                                ip_family, errors.ECODE_INVAL)
4110   if not ipcls.ValidateNetmask(netmask):
4111     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4112                                 (netmask), errors.ECODE_INVAL)
4113
4114
4115 class LUClusterSetParams(LogicalUnit):
4116   """Change the parameters of the cluster.
4117
4118   """
4119   HPATH = "cluster-modify"
4120   HTYPE = constants.HTYPE_CLUSTER
4121   REQ_BGL = False
4122
4123   def CheckArguments(self):
4124     """Check parameters
4125
4126     """
4127     if self.op.uid_pool:
4128       uidpool.CheckUidPool(self.op.uid_pool)
4129
4130     if self.op.add_uids:
4131       uidpool.CheckUidPool(self.op.add_uids)
4132
4133     if self.op.remove_uids:
4134       uidpool.CheckUidPool(self.op.remove_uids)
4135
4136     if self.op.master_netmask is not None:
4137       _ValidateNetmask(self.cfg, self.op.master_netmask)
4138
4139     if self.op.diskparams:
4140       for dt_params in self.op.diskparams.values():
4141         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4142       try:
4143         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4144       except errors.OpPrereqError, err:
4145         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4146                                    errors.ECODE_INVAL)
4147
4148   def ExpandNames(self):
4149     # FIXME: in the future maybe other cluster params won't require checking on
4150     # all nodes to be modified.
4151     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4152     # resource locks the right thing, shouldn't it be the BGL instead?
4153     self.needed_locks = {
4154       locking.LEVEL_NODE: locking.ALL_SET,
4155       locking.LEVEL_INSTANCE: locking.ALL_SET,
4156       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4157       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4158     }
4159     self.share_locks = _ShareAll()
4160
4161   def BuildHooksEnv(self):
4162     """Build hooks env.
4163
4164     """
4165     return {
4166       "OP_TARGET": self.cfg.GetClusterName(),
4167       "NEW_VG_NAME": self.op.vg_name,
4168       }
4169
4170   def BuildHooksNodes(self):
4171     """Build hooks nodes.
4172
4173     """
4174     mn = self.cfg.GetMasterNode()
4175     return ([mn], [mn])
4176
4177   def CheckPrereq(self):
4178     """Check prerequisites.
4179
4180     This checks whether the given params don't conflict and
4181     if the given volume group is valid.
4182
4183     """
4184     if self.op.vg_name is not None and not self.op.vg_name:
4185       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4186         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4187                                    " instances exist", errors.ECODE_INVAL)
4188
4189     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4190       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4191         raise errors.OpPrereqError("Cannot disable drbd helper while"
4192                                    " drbd-based instances exist",
4193                                    errors.ECODE_INVAL)
4194
4195     node_list = self.owned_locks(locking.LEVEL_NODE)
4196
4197     # if vg_name not None, checks given volume group on all nodes
4198     if self.op.vg_name:
4199       vglist = self.rpc.call_vg_list(node_list)
4200       for node in node_list:
4201         msg = vglist[node].fail_msg
4202         if msg:
4203           # ignoring down node
4204           self.LogWarning("Error while gathering data on node %s"
4205                           " (ignoring node): %s", node, msg)
4206           continue
4207         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4208                                               self.op.vg_name,
4209                                               constants.MIN_VG_SIZE)
4210         if vgstatus:
4211           raise errors.OpPrereqError("Error on node '%s': %s" %
4212                                      (node, vgstatus), errors.ECODE_ENVIRON)
4213
4214     if self.op.drbd_helper:
4215       # checks given drbd helper on all nodes
4216       helpers = self.rpc.call_drbd_helper(node_list)
4217       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4218         if ninfo.offline:
4219           self.LogInfo("Not checking drbd helper on offline node %s", node)
4220           continue
4221         msg = helpers[node].fail_msg
4222         if msg:
4223           raise errors.OpPrereqError("Error checking drbd helper on node"
4224                                      " '%s': %s" % (node, msg),
4225                                      errors.ECODE_ENVIRON)
4226         node_helper = helpers[node].payload
4227         if node_helper != self.op.drbd_helper:
4228           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4229                                      (node, node_helper), errors.ECODE_ENVIRON)
4230
4231     self.cluster = cluster = self.cfg.GetClusterInfo()
4232     # validate params changes
4233     if self.op.beparams:
4234       objects.UpgradeBeParams(self.op.beparams)
4235       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4236       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4237
4238     if self.op.ndparams:
4239       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4240       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4241
4242       # TODO: we need a more general way to handle resetting
4243       # cluster-level parameters to default values
4244       if self.new_ndparams["oob_program"] == "":
4245         self.new_ndparams["oob_program"] = \
4246             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4247
4248     if self.op.hv_state:
4249       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4250                                             self.cluster.hv_state_static)
4251       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4252                                for hv, values in new_hv_state.items())
4253
4254     if self.op.disk_state:
4255       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4256                                                 self.cluster.disk_state_static)
4257       self.new_disk_state = \
4258         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4259                             for name, values in svalues.items()))
4260              for storage, svalues in new_disk_state.items())
4261
4262     if self.op.ipolicy:
4263       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4264                                             group_policy=False)
4265
4266       all_instances = self.cfg.GetAllInstancesInfo().values()
4267       violations = set()
4268       for group in self.cfg.GetAllNodeGroupsInfo().values():
4269         instances = frozenset([inst for inst in all_instances
4270                                if compat.any(node in group.members
4271                                              for node in inst.all_nodes)])
4272         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4273         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4274         new = _ComputeNewInstanceViolations(ipol,
4275                                             new_ipolicy, instances)
4276         if new:
4277           violations.update(new)
4278
4279       if violations:
4280         self.LogWarning("After the ipolicy change the following instances"
4281                         " violate them: %s",
4282                         utils.CommaJoin(utils.NiceSort(violations)))
4283
4284     if self.op.nicparams:
4285       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4286       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4287       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4288       nic_errors = []
4289
4290       # check all instances for consistency
4291       for instance in self.cfg.GetAllInstancesInfo().values():
4292         for nic_idx, nic in enumerate(instance.nics):
4293           params_copy = copy.deepcopy(nic.nicparams)
4294           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4295
4296           # check parameter syntax
4297           try:
4298             objects.NIC.CheckParameterSyntax(params_filled)
4299           except errors.ConfigurationError, err:
4300             nic_errors.append("Instance %s, nic/%d: %s" %
4301                               (instance.name, nic_idx, err))
4302
4303           # if we're moving instances to routed, check that they have an ip
4304           target_mode = params_filled[constants.NIC_MODE]
4305           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4306             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4307                               " address" % (instance.name, nic_idx))
4308       if nic_errors:
4309         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4310                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4311
4312     # hypervisor list/parameters
4313     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4314     if self.op.hvparams:
4315       for hv_name, hv_dict in self.op.hvparams.items():
4316         if hv_name not in self.new_hvparams:
4317           self.new_hvparams[hv_name] = hv_dict
4318         else:
4319           self.new_hvparams[hv_name].update(hv_dict)
4320
4321     # disk template parameters
4322     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4323     if self.op.diskparams:
4324       for dt_name, dt_params in self.op.diskparams.items():
4325         if dt_name not in self.op.diskparams:
4326           self.new_diskparams[dt_name] = dt_params
4327         else:
4328           self.new_diskparams[dt_name].update(dt_params)
4329
4330     # os hypervisor parameters
4331     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4332     if self.op.os_hvp:
4333       for os_name, hvs in self.op.os_hvp.items():
4334         if os_name not in self.new_os_hvp:
4335           self.new_os_hvp[os_name] = hvs
4336         else:
4337           for hv_name, hv_dict in hvs.items():
4338             if hv_dict is None:
4339               # Delete if it exists
4340               self.new_os_hvp[os_name].pop(hv_name, None)
4341             elif hv_name not in self.new_os_hvp[os_name]:
4342               self.new_os_hvp[os_name][hv_name] = hv_dict
4343             else:
4344               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4345
4346     # os parameters
4347     self.new_osp = objects.FillDict(cluster.osparams, {})
4348     if self.op.osparams:
4349       for os_name, osp in self.op.osparams.items():
4350         if os_name not in self.new_osp:
4351           self.new_osp[os_name] = {}
4352
4353         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4354                                                   use_none=True)
4355
4356         if not self.new_osp[os_name]:
4357           # we removed all parameters
4358           del self.new_osp[os_name]
4359         else:
4360           # check the parameter validity (remote check)
4361           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4362                          os_name, self.new_osp[os_name])
4363
4364     # changes to the hypervisor list
4365     if self.op.enabled_hypervisors is not None:
4366       self.hv_list = self.op.enabled_hypervisors
4367       for hv in self.hv_list:
4368         # if the hypervisor doesn't already exist in the cluster
4369         # hvparams, we initialize it to empty, and then (in both
4370         # cases) we make sure to fill the defaults, as we might not
4371         # have a complete defaults list if the hypervisor wasn't
4372         # enabled before
4373         if hv not in new_hvp:
4374           new_hvp[hv] = {}
4375         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4376         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4377     else:
4378       self.hv_list = cluster.enabled_hypervisors
4379
4380     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4381       # either the enabled list has changed, or the parameters have, validate
4382       for hv_name, hv_params in self.new_hvparams.items():
4383         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4384             (self.op.enabled_hypervisors and
4385              hv_name in self.op.enabled_hypervisors)):
4386           # either this is a new hypervisor, or its parameters have changed
4387           hv_class = hypervisor.GetHypervisorClass(hv_name)
4388           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4389           hv_class.CheckParameterSyntax(hv_params)
4390           _CheckHVParams(self, node_list, hv_name, hv_params)
4391
4392     if self.op.os_hvp:
4393       # no need to check any newly-enabled hypervisors, since the
4394       # defaults have already been checked in the above code-block
4395       for os_name, os_hvp in self.new_os_hvp.items():
4396         for hv_name, hv_params in os_hvp.items():
4397           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4398           # we need to fill in the new os_hvp on top of the actual hv_p
4399           cluster_defaults = self.new_hvparams.get(hv_name, {})
4400           new_osp = objects.FillDict(cluster_defaults, hv_params)
4401           hv_class = hypervisor.GetHypervisorClass(hv_name)
4402           hv_class.CheckParameterSyntax(new_osp)
4403           _CheckHVParams(self, node_list, hv_name, new_osp)
4404
4405     if self.op.default_iallocator:
4406       alloc_script = utils.FindFile(self.op.default_iallocator,
4407                                     constants.IALLOCATOR_SEARCH_PATH,
4408                                     os.path.isfile)
4409       if alloc_script is None:
4410         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4411                                    " specified" % self.op.default_iallocator,
4412                                    errors.ECODE_INVAL)
4413
4414   def Exec(self, feedback_fn):
4415     """Change the parameters of the cluster.
4416
4417     """
4418     if self.op.vg_name is not None:
4419       new_volume = self.op.vg_name
4420       if not new_volume:
4421         new_volume = None
4422       if new_volume != self.cfg.GetVGName():
4423         self.cfg.SetVGName(new_volume)
4424       else:
4425         feedback_fn("Cluster LVM configuration already in desired"
4426                     " state, not changing")
4427     if self.op.drbd_helper is not None:
4428       new_helper = self.op.drbd_helper
4429       if not new_helper:
4430         new_helper = None
4431       if new_helper != self.cfg.GetDRBDHelper():
4432         self.cfg.SetDRBDHelper(new_helper)
4433       else:
4434         feedback_fn("Cluster DRBD helper already in desired state,"
4435                     " not changing")
4436     if self.op.hvparams:
4437       self.cluster.hvparams = self.new_hvparams
4438     if self.op.os_hvp:
4439       self.cluster.os_hvp = self.new_os_hvp
4440     if self.op.enabled_hypervisors is not None:
4441       self.cluster.hvparams = self.new_hvparams
4442       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4443     if self.op.beparams:
4444       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4445     if self.op.nicparams:
4446       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4447     if self.op.ipolicy:
4448       self.cluster.ipolicy = self.new_ipolicy
4449     if self.op.osparams:
4450       self.cluster.osparams = self.new_osp
4451     if self.op.ndparams:
4452       self.cluster.ndparams = self.new_ndparams
4453     if self.op.diskparams:
4454       self.cluster.diskparams = self.new_diskparams
4455     if self.op.hv_state:
4456       self.cluster.hv_state_static = self.new_hv_state
4457     if self.op.disk_state:
4458       self.cluster.disk_state_static = self.new_disk_state
4459
4460     if self.op.candidate_pool_size is not None:
4461       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4462       # we need to update the pool size here, otherwise the save will fail
4463       _AdjustCandidatePool(self, [])
4464
4465     if self.op.maintain_node_health is not None:
4466       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4467         feedback_fn("Note: CONFD was disabled at build time, node health"
4468                     " maintenance is not useful (still enabling it)")
4469       self.cluster.maintain_node_health = self.op.maintain_node_health
4470
4471     if self.op.prealloc_wipe_disks is not None:
4472       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4473
4474     if self.op.add_uids is not None:
4475       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4476
4477     if self.op.remove_uids is not None:
4478       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4479
4480     if self.op.uid_pool is not None:
4481       self.cluster.uid_pool = self.op.uid_pool
4482
4483     if self.op.default_iallocator is not None:
4484       self.cluster.default_iallocator = self.op.default_iallocator
4485
4486     if self.op.reserved_lvs is not None:
4487       self.cluster.reserved_lvs = self.op.reserved_lvs
4488
4489     if self.op.use_external_mip_script is not None:
4490       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4491
4492     def helper_os(aname, mods, desc):
4493       desc += " OS list"
4494       lst = getattr(self.cluster, aname)
4495       for key, val in mods:
4496         if key == constants.DDM_ADD:
4497           if val in lst:
4498             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4499           else:
4500             lst.append(val)
4501         elif key == constants.DDM_REMOVE:
4502           if val in lst:
4503             lst.remove(val)
4504           else:
4505             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4506         else:
4507           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4508
4509     if self.op.hidden_os:
4510       helper_os("hidden_os", self.op.hidden_os, "hidden")
4511
4512     if self.op.blacklisted_os:
4513       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4514
4515     if self.op.master_netdev:
4516       master_params = self.cfg.GetMasterNetworkParameters()
4517       ems = self.cfg.GetUseExternalMipScript()
4518       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4519                   self.cluster.master_netdev)
4520       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4521                                                        master_params, ems)
4522       result.Raise("Could not disable the master ip")
4523       feedback_fn("Changing master_netdev from %s to %s" %
4524                   (master_params.netdev, self.op.master_netdev))
4525       self.cluster.master_netdev = self.op.master_netdev
4526
4527     if self.op.master_netmask:
4528       master_params = self.cfg.GetMasterNetworkParameters()
4529       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4530       result = self.rpc.call_node_change_master_netmask(master_params.name,
4531                                                         master_params.netmask,
4532                                                         self.op.master_netmask,
4533                                                         master_params.ip,
4534                                                         master_params.netdev)
4535       if result.fail_msg:
4536         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4537         feedback_fn(msg)
4538
4539       self.cluster.master_netmask = self.op.master_netmask
4540
4541     self.cfg.Update(self.cluster, feedback_fn)
4542
4543     if self.op.master_netdev:
4544       master_params = self.cfg.GetMasterNetworkParameters()
4545       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4546                   self.op.master_netdev)
4547       ems = self.cfg.GetUseExternalMipScript()
4548       result = self.rpc.call_node_activate_master_ip(master_params.name,
4549                                                      master_params, ems)
4550       if result.fail_msg:
4551         self.LogWarning("Could not re-enable the master ip on"
4552                         " the master, please restart manually: %s",
4553                         result.fail_msg)
4554
4555
4556 def _UploadHelper(lu, nodes, fname):
4557   """Helper for uploading a file and showing warnings.
4558
4559   """
4560   if os.path.exists(fname):
4561     result = lu.rpc.call_upload_file(nodes, fname)
4562     for to_node, to_result in result.items():
4563       msg = to_result.fail_msg
4564       if msg:
4565         msg = ("Copy of file %s to node %s failed: %s" %
4566                (fname, to_node, msg))
4567         lu.LogWarning(msg)
4568
4569
4570 def _ComputeAncillaryFiles(cluster, redist):
4571   """Compute files external to Ganeti which need to be consistent.
4572
4573   @type redist: boolean
4574   @param redist: Whether to include files which need to be redistributed
4575
4576   """
4577   # Compute files for all nodes
4578   files_all = set([
4579     pathutils.SSH_KNOWN_HOSTS_FILE,
4580     pathutils.CONFD_HMAC_KEY,
4581     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4582     pathutils.SPICE_CERT_FILE,
4583     pathutils.SPICE_CACERT_FILE,
4584     pathutils.RAPI_USERS_FILE,
4585     ])
4586
4587   if redist:
4588     # we need to ship at least the RAPI certificate
4589     files_all.add(pathutils.RAPI_CERT_FILE)
4590   else:
4591     files_all.update(pathutils.ALL_CERT_FILES)
4592     files_all.update(ssconf.SimpleStore().GetFileList())
4593
4594   if cluster.modify_etc_hosts:
4595     files_all.add(pathutils.ETC_HOSTS)
4596
4597   if cluster.use_external_mip_script:
4598     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4599
4600   # Files which are optional, these must:
4601   # - be present in one other category as well
4602   # - either exist or not exist on all nodes of that category (mc, vm all)
4603   files_opt = set([
4604     pathutils.RAPI_USERS_FILE,
4605     ])
4606
4607   # Files which should only be on master candidates
4608   files_mc = set()
4609
4610   if not redist:
4611     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4612
4613   # File storage
4614   if (not redist and
4615       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4616     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4617     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4618
4619   # Files which should only be on VM-capable nodes
4620   files_vm = set(
4621     filename
4622     for hv_name in cluster.enabled_hypervisors
4623     for filename in
4624       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4625
4626   files_opt |= set(
4627     filename
4628     for hv_name in cluster.enabled_hypervisors
4629     for filename in
4630       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4631
4632   # Filenames in each category must be unique
4633   all_files_set = files_all | files_mc | files_vm
4634   assert (len(all_files_set) ==
4635           sum(map(len, [files_all, files_mc, files_vm]))), \
4636          "Found file listed in more than one file list"
4637
4638   # Optional files must be present in one other category
4639   assert all_files_set.issuperset(files_opt), \
4640          "Optional file not in a different required list"
4641
4642   # This one file should never ever be re-distributed via RPC
4643   assert not (redist and
4644               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4645
4646   return (files_all, files_opt, files_mc, files_vm)
4647
4648
4649 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4650   """Distribute additional files which are part of the cluster configuration.
4651
4652   ConfigWriter takes care of distributing the config and ssconf files, but
4653   there are more files which should be distributed to all nodes. This function
4654   makes sure those are copied.
4655
4656   @param lu: calling logical unit
4657   @param additional_nodes: list of nodes not in the config to distribute to
4658   @type additional_vm: boolean
4659   @param additional_vm: whether the additional nodes are vm-capable or not
4660
4661   """
4662   # Gather target nodes
4663   cluster = lu.cfg.GetClusterInfo()
4664   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4665
4666   online_nodes = lu.cfg.GetOnlineNodeList()
4667   online_set = frozenset(online_nodes)
4668   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4669
4670   if additional_nodes is not None:
4671     online_nodes.extend(additional_nodes)
4672     if additional_vm:
4673       vm_nodes.extend(additional_nodes)
4674
4675   # Never distribute to master node
4676   for nodelist in [online_nodes, vm_nodes]:
4677     if master_info.name in nodelist:
4678       nodelist.remove(master_info.name)
4679
4680   # Gather file lists
4681   (files_all, _, files_mc, files_vm) = \
4682     _ComputeAncillaryFiles(cluster, True)
4683
4684   # Never re-distribute configuration file from here
4685   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4686               pathutils.CLUSTER_CONF_FILE in files_vm)
4687   assert not files_mc, "Master candidates not handled in this function"
4688
4689   filemap = [
4690     (online_nodes, files_all),
4691     (vm_nodes, files_vm),
4692     ]
4693
4694   # Upload the files
4695   for (node_list, files) in filemap:
4696     for fname in files:
4697       _UploadHelper(lu, node_list, fname)
4698
4699
4700 class LUClusterRedistConf(NoHooksLU):
4701   """Force the redistribution of cluster configuration.
4702
4703   This is a very simple LU.
4704
4705   """
4706   REQ_BGL = False
4707
4708   def ExpandNames(self):
4709     self.needed_locks = {
4710       locking.LEVEL_NODE: locking.ALL_SET,
4711       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4712     }
4713     self.share_locks = _ShareAll()
4714
4715   def Exec(self, feedback_fn):
4716     """Redistribute the configuration.
4717
4718     """
4719     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4720     _RedistributeAncillaryFiles(self)
4721
4722
4723 class LUClusterActivateMasterIp(NoHooksLU):
4724   """Activate the master IP on the master node.
4725
4726   """
4727   def Exec(self, feedback_fn):
4728     """Activate the master IP.
4729
4730     """
4731     master_params = self.cfg.GetMasterNetworkParameters()
4732     ems = self.cfg.GetUseExternalMipScript()
4733     result = self.rpc.call_node_activate_master_ip(master_params.name,
4734                                                    master_params, ems)
4735     result.Raise("Could not activate the master IP")
4736
4737
4738 class LUClusterDeactivateMasterIp(NoHooksLU):
4739   """Deactivate the master IP on the master node.
4740
4741   """
4742   def Exec(self, feedback_fn):
4743     """Deactivate the master IP.
4744
4745     """
4746     master_params = self.cfg.GetMasterNetworkParameters()
4747     ems = self.cfg.GetUseExternalMipScript()
4748     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4749                                                      master_params, ems)
4750     result.Raise("Could not deactivate the master IP")
4751
4752
4753 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4754   """Sleep and poll for an instance's disk to sync.
4755
4756   """
4757   if not instance.disks or disks is not None and not disks:
4758     return True
4759
4760   disks = _ExpandCheckDisks(instance, disks)
4761
4762   if not oneshot:
4763     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4764
4765   node = instance.primary_node
4766
4767   for dev in disks:
4768     lu.cfg.SetDiskID(dev, node)
4769
4770   # TODO: Convert to utils.Retry
4771
4772   retries = 0
4773   degr_retries = 10 # in seconds, as we sleep 1 second each time
4774   while True:
4775     max_time = 0
4776     done = True
4777     cumul_degraded = False
4778     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4779     msg = rstats.fail_msg
4780     if msg:
4781       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4782       retries += 1
4783       if retries >= 10:
4784         raise errors.RemoteError("Can't contact node %s for mirror data,"
4785                                  " aborting." % node)
4786       time.sleep(6)
4787       continue
4788     rstats = rstats.payload
4789     retries = 0
4790     for i, mstat in enumerate(rstats):
4791       if mstat is None:
4792         lu.LogWarning("Can't compute data for node %s/%s",
4793                            node, disks[i].iv_name)
4794         continue
4795
4796       cumul_degraded = (cumul_degraded or
4797                         (mstat.is_degraded and mstat.sync_percent is None))
4798       if mstat.sync_percent is not None:
4799         done = False
4800         if mstat.estimated_time is not None:
4801           rem_time = ("%s remaining (estimated)" %
4802                       utils.FormatSeconds(mstat.estimated_time))
4803           max_time = mstat.estimated_time
4804         else:
4805           rem_time = "no time estimate"
4806         lu.LogInfo("- device %s: %5.2f%% done, %s",
4807                    disks[i].iv_name, mstat.sync_percent, rem_time)
4808
4809     # if we're done but degraded, let's do a few small retries, to
4810     # make sure we see a stable and not transient situation; therefore
4811     # we force restart of the loop
4812     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4813       logging.info("Degraded disks found, %d retries left", degr_retries)
4814       degr_retries -= 1
4815       time.sleep(1)
4816       continue
4817
4818     if done or oneshot:
4819       break
4820
4821     time.sleep(min(60, max_time))
4822
4823   if done:
4824     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4825
4826   return not cumul_degraded
4827
4828
4829 def _BlockdevFind(lu, node, dev, instance):
4830   """Wrapper around call_blockdev_find to annotate diskparams.
4831
4832   @param lu: A reference to the lu object
4833   @param node: The node to call out
4834   @param dev: The device to find
4835   @param instance: The instance object the device belongs to
4836   @returns The result of the rpc call
4837
4838   """
4839   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4840   return lu.rpc.call_blockdev_find(node, disk)
4841
4842
4843 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4844   """Wrapper around L{_CheckDiskConsistencyInner}.
4845
4846   """
4847   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4848   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4849                                     ldisk=ldisk)
4850
4851
4852 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4853                                ldisk=False):
4854   """Check that mirrors are not degraded.
4855
4856   @attention: The device has to be annotated already.
4857
4858   The ldisk parameter, if True, will change the test from the
4859   is_degraded attribute (which represents overall non-ok status for
4860   the device(s)) to the ldisk (representing the local storage status).
4861
4862   """
4863   lu.cfg.SetDiskID(dev, node)
4864
4865   result = True
4866
4867   if on_primary or dev.AssembleOnSecondary():
4868     rstats = lu.rpc.call_blockdev_find(node, dev)
4869     msg = rstats.fail_msg
4870     if msg:
4871       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4872       result = False
4873     elif not rstats.payload:
4874       lu.LogWarning("Can't find disk on node %s", node)
4875       result = False
4876     else:
4877       if ldisk:
4878         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4879       else:
4880         result = result and not rstats.payload.is_degraded
4881
4882   if dev.children:
4883     for child in dev.children:
4884       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4885                                                      on_primary)
4886
4887   return result
4888
4889
4890 class LUOobCommand(NoHooksLU):
4891   """Logical unit for OOB handling.
4892
4893   """
4894   REQ_BGL = False
4895   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4896
4897   def ExpandNames(self):
4898     """Gather locks we need.
4899
4900     """
4901     if self.op.node_names:
4902       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4903       lock_names = self.op.node_names
4904     else:
4905       lock_names = locking.ALL_SET
4906
4907     self.needed_locks = {
4908       locking.LEVEL_NODE: lock_names,
4909       }
4910
4911     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4912
4913     if not self.op.node_names:
4914       # Acquire node allocation lock only if all nodes are affected
4915       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4916
4917   def CheckPrereq(self):
4918     """Check prerequisites.
4919
4920     This checks:
4921      - the node exists in the configuration
4922      - OOB is supported
4923
4924     Any errors are signaled by raising errors.OpPrereqError.
4925
4926     """
4927     self.nodes = []
4928     self.master_node = self.cfg.GetMasterNode()
4929
4930     assert self.op.power_delay >= 0.0
4931
4932     if self.op.node_names:
4933       if (self.op.command in self._SKIP_MASTER and
4934           self.master_node in self.op.node_names):
4935         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4936         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4937
4938         if master_oob_handler:
4939           additional_text = ("run '%s %s %s' if you want to operate on the"
4940                              " master regardless") % (master_oob_handler,
4941                                                       self.op.command,
4942                                                       self.master_node)
4943         else:
4944           additional_text = "it does not support out-of-band operations"
4945
4946         raise errors.OpPrereqError(("Operating on the master node %s is not"
4947                                     " allowed for %s; %s") %
4948                                    (self.master_node, self.op.command,
4949                                     additional_text), errors.ECODE_INVAL)
4950     else:
4951       self.op.node_names = self.cfg.GetNodeList()
4952       if self.op.command in self._SKIP_MASTER:
4953         self.op.node_names.remove(self.master_node)
4954
4955     if self.op.command in self._SKIP_MASTER:
4956       assert self.master_node not in self.op.node_names
4957
4958     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4959       if node is None:
4960         raise errors.OpPrereqError("Node %s not found" % node_name,
4961                                    errors.ECODE_NOENT)
4962       else:
4963         self.nodes.append(node)
4964
4965       if (not self.op.ignore_status and
4966           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4967         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4968                                     " not marked offline") % node_name,
4969                                    errors.ECODE_STATE)
4970
4971   def Exec(self, feedback_fn):
4972     """Execute OOB and return result if we expect any.
4973
4974     """
4975     master_node = self.master_node
4976     ret = []
4977
4978     for idx, node in enumerate(utils.NiceSort(self.nodes,
4979                                               key=lambda node: node.name)):
4980       node_entry = [(constants.RS_NORMAL, node.name)]
4981       ret.append(node_entry)
4982
4983       oob_program = _SupportsOob(self.cfg, node)
4984
4985       if not oob_program:
4986         node_entry.append((constants.RS_UNAVAIL, None))
4987         continue
4988
4989       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4990                    self.op.command, oob_program, node.name)
4991       result = self.rpc.call_run_oob(master_node, oob_program,
4992                                      self.op.command, node.name,
4993                                      self.op.timeout)
4994
4995       if result.fail_msg:
4996         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4997                         node.name, result.fail_msg)
4998         node_entry.append((constants.RS_NODATA, None))
4999       else:
5000         try:
5001           self._CheckPayload(result)
5002         except errors.OpExecError, err:
5003           self.LogWarning("Payload returned by node '%s' is not valid: %s",
5004                           node.name, err)
5005           node_entry.append((constants.RS_NODATA, None))
5006         else:
5007           if self.op.command == constants.OOB_HEALTH:
5008             # For health we should log important events
5009             for item, status in result.payload:
5010               if status in [constants.OOB_STATUS_WARNING,
5011                             constants.OOB_STATUS_CRITICAL]:
5012                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5013                                 item, node.name, status)
5014
5015           if self.op.command == constants.OOB_POWER_ON:
5016             node.powered = True
5017           elif self.op.command == constants.OOB_POWER_OFF:
5018             node.powered = False
5019           elif self.op.command == constants.OOB_POWER_STATUS:
5020             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5021             if powered != node.powered:
5022               logging.warning(("Recorded power state (%s) of node '%s' does not"
5023                                " match actual power state (%s)"), node.powered,
5024                               node.name, powered)
5025
5026           # For configuration changing commands we should update the node
5027           if self.op.command in (constants.OOB_POWER_ON,
5028                                  constants.OOB_POWER_OFF):
5029             self.cfg.Update(node, feedback_fn)
5030
5031           node_entry.append((constants.RS_NORMAL, result.payload))
5032
5033           if (self.op.command == constants.OOB_POWER_ON and
5034               idx < len(self.nodes) - 1):
5035             time.sleep(self.op.power_delay)
5036
5037     return ret
5038
5039   def _CheckPayload(self, result):
5040     """Checks if the payload is valid.
5041
5042     @param result: RPC result
5043     @raises errors.OpExecError: If payload is not valid
5044
5045     """
5046     errs = []
5047     if self.op.command == constants.OOB_HEALTH:
5048       if not isinstance(result.payload, list):
5049         errs.append("command 'health' is expected to return a list but got %s" %
5050                     type(result.payload))
5051       else:
5052         for item, status in result.payload:
5053           if status not in constants.OOB_STATUSES:
5054             errs.append("health item '%s' has invalid status '%s'" %
5055                         (item, status))
5056
5057     if self.op.command == constants.OOB_POWER_STATUS:
5058       if not isinstance(result.payload, dict):
5059         errs.append("power-status is expected to return a dict but got %s" %
5060                     type(result.payload))
5061
5062     if self.op.command in [
5063       constants.OOB_POWER_ON,
5064       constants.OOB_POWER_OFF,
5065       constants.OOB_POWER_CYCLE,
5066       ]:
5067       if result.payload is not None:
5068         errs.append("%s is expected to not return payload but got '%s'" %
5069                     (self.op.command, result.payload))
5070
5071     if errs:
5072       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5073                                utils.CommaJoin(errs))
5074
5075
5076 class _OsQuery(_QueryBase):
5077   FIELDS = query.OS_FIELDS
5078
5079   def ExpandNames(self, lu):
5080     # Lock all nodes in shared mode
5081     # Temporary removal of locks, should be reverted later
5082     # TODO: reintroduce locks when they are lighter-weight
5083     lu.needed_locks = {}
5084     #self.share_locks[locking.LEVEL_NODE] = 1
5085     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5086
5087     # The following variables interact with _QueryBase._GetNames
5088     if self.names:
5089       self.wanted = self.names
5090     else:
5091       self.wanted = locking.ALL_SET
5092
5093     self.do_locking = self.use_locking
5094
5095   def DeclareLocks(self, lu, level):
5096     pass
5097
5098   @staticmethod
5099   def _DiagnoseByOS(rlist):
5100     """Remaps a per-node return list into an a per-os per-node dictionary
5101
5102     @param rlist: a map with node names as keys and OS objects as values
5103
5104     @rtype: dict
5105     @return: a dictionary with osnames as keys and as value another
5106         map, with nodes as keys and tuples of (path, status, diagnose,
5107         variants, parameters, api_versions) as values, eg::
5108
5109           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5110                                      (/srv/..., False, "invalid api")],
5111                            "node2": [(/srv/..., True, "", [], [])]}
5112           }
5113
5114     """
5115     all_os = {}
5116     # we build here the list of nodes that didn't fail the RPC (at RPC
5117     # level), so that nodes with a non-responding node daemon don't
5118     # make all OSes invalid
5119     good_nodes = [node_name for node_name in rlist
5120                   if not rlist[node_name].fail_msg]
5121     for node_name, nr in rlist.items():
5122       if nr.fail_msg or not nr.payload:
5123         continue
5124       for (name, path, status, diagnose, variants,
5125            params, api_versions) in nr.payload:
5126         if name not in all_os:
5127           # build a list of nodes for this os containing empty lists
5128           # for each node in node_list
5129           all_os[name] = {}
5130           for nname in good_nodes:
5131             all_os[name][nname] = []
5132         # convert params from [name, help] to (name, help)
5133         params = [tuple(v) for v in params]
5134         all_os[name][node_name].append((path, status, diagnose,
5135                                         variants, params, api_versions))
5136     return all_os
5137
5138   def _GetQueryData(self, lu):
5139     """Computes the list of nodes and their attributes.
5140
5141     """
5142     # Locking is not used
5143     assert not (compat.any(lu.glm.is_owned(level)
5144                            for level in locking.LEVELS
5145                            if level != locking.LEVEL_CLUSTER) or
5146                 self.do_locking or self.use_locking)
5147
5148     valid_nodes = [node.name
5149                    for node in lu.cfg.GetAllNodesInfo().values()
5150                    if not node.offline and node.vm_capable]
5151     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5152     cluster = lu.cfg.GetClusterInfo()
5153
5154     data = {}
5155
5156     for (os_name, os_data) in pol.items():
5157       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5158                           hidden=(os_name in cluster.hidden_os),
5159                           blacklisted=(os_name in cluster.blacklisted_os))
5160
5161       variants = set()
5162       parameters = set()
5163       api_versions = set()
5164
5165       for idx, osl in enumerate(os_data.values()):
5166         info.valid = bool(info.valid and osl and osl[0][1])
5167         if not info.valid:
5168           break
5169
5170         (node_variants, node_params, node_api) = osl[0][3:6]
5171         if idx == 0:
5172           # First entry
5173           variants.update(node_variants)
5174           parameters.update(node_params)
5175           api_versions.update(node_api)
5176         else:
5177           # Filter out inconsistent values
5178           variants.intersection_update(node_variants)
5179           parameters.intersection_update(node_params)
5180           api_versions.intersection_update(node_api)
5181
5182       info.variants = list(variants)
5183       info.parameters = list(parameters)
5184       info.api_versions = list(api_versions)
5185
5186       data[os_name] = info
5187
5188     # Prepare data in requested order
5189     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5190             if name in data]
5191
5192
5193 class LUOsDiagnose(NoHooksLU):
5194   """Logical unit for OS diagnose/query.
5195
5196   """
5197   REQ_BGL = False
5198
5199   @staticmethod
5200   def _BuildFilter(fields, names):
5201     """Builds a filter for querying OSes.
5202
5203     """
5204     name_filter = qlang.MakeSimpleFilter("name", names)
5205
5206     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5207     # respective field is not requested
5208     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5209                      for fname in ["hidden", "blacklisted"]
5210                      if fname not in fields]
5211     if "valid" not in fields:
5212       status_filter.append([qlang.OP_TRUE, "valid"])
5213
5214     if status_filter:
5215       status_filter.insert(0, qlang.OP_AND)
5216     else:
5217       status_filter = None
5218
5219     if name_filter and status_filter:
5220       return [qlang.OP_AND, name_filter, status_filter]
5221     elif name_filter:
5222       return name_filter
5223     else:
5224       return status_filter
5225
5226   def CheckArguments(self):
5227     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5228                        self.op.output_fields, False)
5229
5230   def ExpandNames(self):
5231     self.oq.ExpandNames(self)
5232
5233   def Exec(self, feedback_fn):
5234     return self.oq.OldStyleQuery(self)
5235
5236
5237 class _ExtStorageQuery(_QueryBase):
5238   FIELDS = query.EXTSTORAGE_FIELDS
5239
5240   def ExpandNames(self, lu):
5241     # Lock all nodes in shared mode
5242     # Temporary removal of locks, should be reverted later
5243     # TODO: reintroduce locks when they are lighter-weight
5244     lu.needed_locks = {}
5245     #self.share_locks[locking.LEVEL_NODE] = 1
5246     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5247
5248     # The following variables interact with _QueryBase._GetNames
5249     if self.names:
5250       self.wanted = self.names
5251     else:
5252       self.wanted = locking.ALL_SET
5253
5254     self.do_locking = self.use_locking
5255
5256   def DeclareLocks(self, lu, level):
5257     pass
5258
5259   @staticmethod
5260   def _DiagnoseByProvider(rlist):
5261     """Remaps a per-node return list into an a per-provider per-node dictionary
5262
5263     @param rlist: a map with node names as keys and ExtStorage objects as values
5264
5265     @rtype: dict
5266     @return: a dictionary with extstorage providers as keys and as
5267         value another map, with nodes as keys and tuples of
5268         (path, status, diagnose, parameters) as values, eg::
5269
5270           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5271                          "node2": [(/srv/..., False, "missing file")]
5272                          "node3": [(/srv/..., True, "", [])]
5273           }
5274
5275     """
5276     all_es = {}
5277     # we build here the list of nodes that didn't fail the RPC (at RPC
5278     # level), so that nodes with a non-responding node daemon don't
5279     # make all OSes invalid
5280     good_nodes = [node_name for node_name in rlist
5281                   if not rlist[node_name].fail_msg]
5282     for node_name, nr in rlist.items():
5283       if nr.fail_msg or not nr.payload:
5284         continue
5285       for (name, path, status, diagnose, params) in nr.payload:
5286         if name not in all_es:
5287           # build a list of nodes for this os containing empty lists
5288           # for each node in node_list
5289           all_es[name] = {}
5290           for nname in good_nodes:
5291             all_es[name][nname] = []
5292         # convert params from [name, help] to (name, help)
5293         params = [tuple(v) for v in params]
5294         all_es[name][node_name].append((path, status, diagnose, params))
5295     return all_es
5296
5297   def _GetQueryData(self, lu):
5298     """Computes the list of nodes and their attributes.
5299
5300     """
5301     # Locking is not used
5302     assert not (compat.any(lu.glm.is_owned(level)
5303                            for level in locking.LEVELS
5304                            if level != locking.LEVEL_CLUSTER) or
5305                 self.do_locking or self.use_locking)
5306
5307     valid_nodes = [node.name
5308                    for node in lu.cfg.GetAllNodesInfo().values()
5309                    if not node.offline and node.vm_capable]
5310     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5311
5312     data = {}
5313
5314     nodegroup_list = lu.cfg.GetNodeGroupList()
5315
5316     for (es_name, es_data) in pol.items():
5317       # For every provider compute the nodegroup validity.
5318       # To do this we need to check the validity of each node in es_data
5319       # and then construct the corresponding nodegroup dict:
5320       #      { nodegroup1: status
5321       #        nodegroup2: status
5322       #      }
5323       ndgrp_data = {}
5324       for nodegroup in nodegroup_list:
5325         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5326
5327         nodegroup_nodes = ndgrp.members
5328         nodegroup_name = ndgrp.name
5329         node_statuses = []
5330
5331         for node in nodegroup_nodes:
5332           if node in valid_nodes:
5333             if es_data[node] != []:
5334               node_status = es_data[node][0][1]
5335               node_statuses.append(node_status)
5336             else:
5337               node_statuses.append(False)
5338
5339         if False in node_statuses:
5340           ndgrp_data[nodegroup_name] = False
5341         else:
5342           ndgrp_data[nodegroup_name] = True
5343
5344       # Compute the provider's parameters
5345       parameters = set()
5346       for idx, esl in enumerate(es_data.values()):
5347         valid = bool(esl and esl[0][1])
5348         if not valid:
5349           break
5350
5351         node_params = esl[0][3]
5352         if idx == 0:
5353           # First entry
5354           parameters.update(node_params)
5355         else:
5356           # Filter out inconsistent values
5357           parameters.intersection_update(node_params)
5358
5359       params = list(parameters)
5360
5361       # Now fill all the info for this provider
5362       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5363                                   nodegroup_status=ndgrp_data,
5364                                   parameters=params)
5365
5366       data[es_name] = info
5367
5368     # Prepare data in requested order
5369     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5370             if name in data]
5371
5372
5373 class LUExtStorageDiagnose(NoHooksLU):
5374   """Logical unit for ExtStorage diagnose/query.
5375
5376   """
5377   REQ_BGL = False
5378
5379   def CheckArguments(self):
5380     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5381                                self.op.output_fields, False)
5382
5383   def ExpandNames(self):
5384     self.eq.ExpandNames(self)
5385
5386   def Exec(self, feedback_fn):
5387     return self.eq.OldStyleQuery(self)
5388
5389
5390 class LUNodeRemove(LogicalUnit):
5391   """Logical unit for removing a node.
5392
5393   """
5394   HPATH = "node-remove"
5395   HTYPE = constants.HTYPE_NODE
5396
5397   def BuildHooksEnv(self):
5398     """Build hooks env.
5399
5400     """
5401     return {
5402       "OP_TARGET": self.op.node_name,
5403       "NODE_NAME": self.op.node_name,
5404       }
5405
5406   def BuildHooksNodes(self):
5407     """Build hooks nodes.
5408
5409     This doesn't run on the target node in the pre phase as a failed
5410     node would then be impossible to remove.
5411
5412     """
5413     all_nodes = self.cfg.GetNodeList()
5414     try:
5415       all_nodes.remove(self.op.node_name)
5416     except ValueError:
5417       pass
5418     return (all_nodes, all_nodes)
5419
5420   def CheckPrereq(self):
5421     """Check prerequisites.
5422
5423     This checks:
5424      - the node exists in the configuration
5425      - it does not have primary or secondary instances
5426      - it's not the master
5427
5428     Any errors are signaled by raising errors.OpPrereqError.
5429
5430     """
5431     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5432     node = self.cfg.GetNodeInfo(self.op.node_name)
5433     assert node is not None
5434
5435     masternode = self.cfg.GetMasterNode()
5436     if node.name == masternode:
5437       raise errors.OpPrereqError("Node is the master node, failover to another"
5438                                  " node is required", errors.ECODE_INVAL)
5439
5440     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5441       if node.name in instance.all_nodes:
5442         raise errors.OpPrereqError("Instance %s is still running on the node,"
5443                                    " please remove first" % instance_name,
5444                                    errors.ECODE_INVAL)
5445     self.op.node_name = node.name
5446     self.node = node
5447
5448   def Exec(self, feedback_fn):
5449     """Removes the node from the cluster.
5450
5451     """
5452     node = self.node
5453     logging.info("Stopping the node daemon and removing configs from node %s",
5454                  node.name)
5455
5456     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5457
5458     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5459       "Not owning BGL"
5460
5461     # Promote nodes to master candidate as needed
5462     _AdjustCandidatePool(self, exceptions=[node.name])
5463     self.context.RemoveNode(node.name)
5464
5465     # Run post hooks on the node before it's removed
5466     _RunPostHook(self, node.name)
5467
5468     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5469     msg = result.fail_msg
5470     if msg:
5471       self.LogWarning("Errors encountered on the remote node while leaving"
5472                       " the cluster: %s", msg)
5473
5474     # Remove node from our /etc/hosts
5475     if self.cfg.GetClusterInfo().modify_etc_hosts:
5476       master_node = self.cfg.GetMasterNode()
5477       result = self.rpc.call_etc_hosts_modify(master_node,
5478                                               constants.ETC_HOSTS_REMOVE,
5479                                               node.name, None)
5480       result.Raise("Can't update hosts file with new host data")
5481       _RedistributeAncillaryFiles(self)
5482
5483
5484 class _NodeQuery(_QueryBase):
5485   FIELDS = query.NODE_FIELDS
5486
5487   def ExpandNames(self, lu):
5488     lu.needed_locks = {}
5489     lu.share_locks = _ShareAll()
5490
5491     if self.names:
5492       self.wanted = _GetWantedNodes(lu, self.names)
5493     else:
5494       self.wanted = locking.ALL_SET
5495
5496     self.do_locking = (self.use_locking and
5497                        query.NQ_LIVE in self.requested_data)
5498
5499     if self.do_locking:
5500       # If any non-static field is requested we need to lock the nodes
5501       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5502       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5503
5504   def DeclareLocks(self, lu, level):
5505     pass
5506
5507   def _GetQueryData(self, lu):
5508     """Computes the list of nodes and their attributes.
5509
5510     """
5511     all_info = lu.cfg.GetAllNodesInfo()
5512
5513     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5514
5515     # Gather data as requested
5516     if query.NQ_LIVE in self.requested_data:
5517       # filter out non-vm_capable nodes
5518       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5519
5520       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5521       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5522                                         [lu.cfg.GetHypervisorType()], es_flags)
5523       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5524                        for (name, nresult) in node_data.items()
5525                        if not nresult.fail_msg and nresult.payload)
5526     else:
5527       live_data = None
5528
5529     if query.NQ_INST in self.requested_data:
5530       node_to_primary = dict([(name, set()) for name in nodenames])
5531       node_to_secondary = dict([(name, set()) for name in nodenames])
5532
5533       inst_data = lu.cfg.GetAllInstancesInfo()
5534
5535       for inst in inst_data.values():
5536         if inst.primary_node in node_to_primary:
5537           node_to_primary[inst.primary_node].add(inst.name)
5538         for secnode in inst.secondary_nodes:
5539           if secnode in node_to_secondary:
5540             node_to_secondary[secnode].add(inst.name)
5541     else:
5542       node_to_primary = None
5543       node_to_secondary = None
5544
5545     if query.NQ_OOB in self.requested_data:
5546       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5547                          for name, node in all_info.iteritems())
5548     else:
5549       oob_support = None
5550
5551     if query.NQ_GROUP in self.requested_data:
5552       groups = lu.cfg.GetAllNodeGroupsInfo()
5553     else:
5554       groups = {}
5555
5556     return query.NodeQueryData([all_info[name] for name in nodenames],
5557                                live_data, lu.cfg.GetMasterNode(),
5558                                node_to_primary, node_to_secondary, groups,
5559                                oob_support, lu.cfg.GetClusterInfo())
5560
5561
5562 class LUNodeQuery(NoHooksLU):
5563   """Logical unit for querying nodes.
5564
5565   """
5566   # pylint: disable=W0142
5567   REQ_BGL = False
5568
5569   def CheckArguments(self):
5570     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5571                          self.op.output_fields, self.op.use_locking)
5572
5573   def ExpandNames(self):
5574     self.nq.ExpandNames(self)
5575
5576   def DeclareLocks(self, level):
5577     self.nq.DeclareLocks(self, level)
5578
5579   def Exec(self, feedback_fn):
5580     return self.nq.OldStyleQuery(self)
5581
5582
5583 class LUNodeQueryvols(NoHooksLU):
5584   """Logical unit for getting volumes on node(s).
5585
5586   """
5587   REQ_BGL = False
5588   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5589   _FIELDS_STATIC = utils.FieldSet("node")
5590
5591   def CheckArguments(self):
5592     _CheckOutputFields(static=self._FIELDS_STATIC,
5593                        dynamic=self._FIELDS_DYNAMIC,
5594                        selected=self.op.output_fields)
5595
5596   def ExpandNames(self):
5597     self.share_locks = _ShareAll()
5598
5599     if self.op.nodes:
5600       self.needed_locks = {
5601         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5602         }
5603     else:
5604       self.needed_locks = {
5605         locking.LEVEL_NODE: locking.ALL_SET,
5606         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5607         }
5608
5609   def Exec(self, feedback_fn):
5610     """Computes the list of nodes and their attributes.
5611
5612     """
5613     nodenames = self.owned_locks(locking.LEVEL_NODE)
5614     volumes = self.rpc.call_node_volumes(nodenames)
5615
5616     ilist = self.cfg.GetAllInstancesInfo()
5617     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5618
5619     output = []
5620     for node in nodenames:
5621       nresult = volumes[node]
5622       if nresult.offline:
5623         continue
5624       msg = nresult.fail_msg
5625       if msg:
5626         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5627         continue
5628
5629       node_vols = sorted(nresult.payload,
5630                          key=operator.itemgetter("dev"))
5631
5632       for vol in node_vols:
5633         node_output = []
5634         for field in self.op.output_fields:
5635           if field == "node":
5636             val = node
5637           elif field == "phys":
5638             val = vol["dev"]
5639           elif field == "vg":
5640             val = vol["vg"]
5641           elif field == "name":
5642             val = vol["name"]
5643           elif field == "size":
5644             val = int(float(vol["size"]))
5645           elif field == "instance":
5646             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5647           else:
5648             raise errors.ParameterError(field)
5649           node_output.append(str(val))
5650
5651         output.append(node_output)
5652
5653     return output
5654
5655
5656 class LUNodeQueryStorage(NoHooksLU):
5657   """Logical unit for getting information on storage units on node(s).
5658
5659   """
5660   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5661   REQ_BGL = False
5662
5663   def CheckArguments(self):
5664     _CheckOutputFields(static=self._FIELDS_STATIC,
5665                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5666                        selected=self.op.output_fields)
5667
5668   def ExpandNames(self):
5669     self.share_locks = _ShareAll()
5670
5671     if self.op.nodes:
5672       self.needed_locks = {
5673         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5674         }
5675     else:
5676       self.needed_locks = {
5677         locking.LEVEL_NODE: locking.ALL_SET,
5678         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5679         }
5680
5681   def Exec(self, feedback_fn):
5682     """Computes the list of nodes and their attributes.
5683
5684     """
5685     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5686
5687     # Always get name to sort by
5688     if constants.SF_NAME in self.op.output_fields:
5689       fields = self.op.output_fields[:]
5690     else:
5691       fields = [constants.SF_NAME] + self.op.output_fields
5692
5693     # Never ask for node or type as it's only known to the LU
5694     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5695       while extra in fields:
5696         fields.remove(extra)
5697
5698     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5699     name_idx = field_idx[constants.SF_NAME]
5700
5701     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5702     data = self.rpc.call_storage_list(self.nodes,
5703                                       self.op.storage_type, st_args,
5704                                       self.op.name, fields)
5705
5706     result = []
5707
5708     for node in utils.NiceSort(self.nodes):
5709       nresult = data[node]
5710       if nresult.offline:
5711         continue
5712
5713       msg = nresult.fail_msg
5714       if msg:
5715         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5716         continue
5717
5718       rows = dict([(row[name_idx], row) for row in nresult.payload])
5719
5720       for name in utils.NiceSort(rows.keys()):
5721         row = rows[name]
5722
5723         out = []
5724
5725         for field in self.op.output_fields:
5726           if field == constants.SF_NODE:
5727             val = node
5728           elif field == constants.SF_TYPE:
5729             val = self.op.storage_type
5730           elif field in field_idx:
5731             val = row[field_idx[field]]
5732           else:
5733             raise errors.ParameterError(field)
5734
5735           out.append(val)
5736
5737         result.append(out)
5738
5739     return result
5740
5741
5742 class _InstanceQuery(_QueryBase):
5743   FIELDS = query.INSTANCE_FIELDS
5744
5745   def ExpandNames(self, lu):
5746     lu.needed_locks = {}
5747     lu.share_locks = _ShareAll()
5748
5749     if self.names:
5750       self.wanted = _GetWantedInstances(lu, self.names)
5751     else:
5752       self.wanted = locking.ALL_SET
5753
5754     self.do_locking = (self.use_locking and
5755                        query.IQ_LIVE in self.requested_data)
5756     if self.do_locking:
5757       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5758       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5759       lu.needed_locks[locking.LEVEL_NODE] = []
5760       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5761
5762     self.do_grouplocks = (self.do_locking and
5763                           query.IQ_NODES in self.requested_data)
5764
5765   def DeclareLocks(self, lu, level):
5766     if self.do_locking:
5767       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5768         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5769
5770         # Lock all groups used by instances optimistically; this requires going
5771         # via the node before it's locked, requiring verification later on
5772         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5773           set(group_uuid
5774               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5775               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5776       elif level == locking.LEVEL_NODE:
5777         lu._LockInstancesNodes() # pylint: disable=W0212
5778
5779   @staticmethod
5780   def _CheckGroupLocks(lu):
5781     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5782     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5783
5784     # Check if node groups for locked instances are still correct
5785     for instance_name in owned_instances:
5786       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5787
5788   def _GetQueryData(self, lu):
5789     """Computes the list of instances and their attributes.
5790
5791     """
5792     if self.do_grouplocks:
5793       self._CheckGroupLocks(lu)
5794
5795     cluster = lu.cfg.GetClusterInfo()
5796     all_info = lu.cfg.GetAllInstancesInfo()
5797
5798     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5799
5800     instance_list = [all_info[name] for name in instance_names]
5801     nodes = frozenset(itertools.chain(*(inst.all_nodes
5802                                         for inst in instance_list)))
5803     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5804     bad_nodes = []
5805     offline_nodes = []
5806     wrongnode_inst = set()
5807
5808     # Gather data as requested
5809     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5810       live_data = {}
5811       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5812       for name in nodes:
5813         result = node_data[name]
5814         if result.offline:
5815           # offline nodes will be in both lists
5816           assert result.fail_msg
5817           offline_nodes.append(name)
5818         if result.fail_msg:
5819           bad_nodes.append(name)
5820         elif result.payload:
5821           for inst in result.payload:
5822             if inst in all_info:
5823               if all_info[inst].primary_node == name:
5824                 live_data.update(result.payload)
5825               else:
5826                 wrongnode_inst.add(inst)
5827             else:
5828               # orphan instance; we don't list it here as we don't
5829               # handle this case yet in the output of instance listing
5830               logging.warning("Orphan instance '%s' found on node %s",
5831                               inst, name)
5832         # else no instance is alive
5833     else:
5834       live_data = {}
5835
5836     if query.IQ_DISKUSAGE in self.requested_data:
5837       gmi = ganeti.masterd.instance
5838       disk_usage = dict((inst.name,
5839                          gmi.ComputeDiskSize(inst.disk_template,
5840                                              [{constants.IDISK_SIZE: disk.size}
5841                                               for disk in inst.disks]))
5842                         for inst in instance_list)
5843     else:
5844       disk_usage = None
5845
5846     if query.IQ_CONSOLE in self.requested_data:
5847       consinfo = {}
5848       for inst in instance_list:
5849         if inst.name in live_data:
5850           # Instance is running
5851           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5852         else:
5853           consinfo[inst.name] = None
5854       assert set(consinfo.keys()) == set(instance_names)
5855     else:
5856       consinfo = None
5857
5858     if query.IQ_NODES in self.requested_data:
5859       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5860                                             instance_list)))
5861       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5862       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5863                     for uuid in set(map(operator.attrgetter("group"),
5864                                         nodes.values())))
5865     else:
5866       nodes = None
5867       groups = None
5868
5869     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5870                                    disk_usage, offline_nodes, bad_nodes,
5871                                    live_data, wrongnode_inst, consinfo,
5872                                    nodes, groups)
5873
5874
5875 class LUQuery(NoHooksLU):
5876   """Query for resources/items of a certain kind.
5877
5878   """
5879   # pylint: disable=W0142
5880   REQ_BGL = False
5881
5882   def CheckArguments(self):
5883     qcls = _GetQueryImplementation(self.op.what)
5884
5885     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5886
5887   def ExpandNames(self):
5888     self.impl.ExpandNames(self)
5889
5890   def DeclareLocks(self, level):
5891     self.impl.DeclareLocks(self, level)
5892
5893   def Exec(self, feedback_fn):
5894     return self.impl.NewStyleQuery(self)
5895
5896
5897 class LUQueryFields(NoHooksLU):
5898   """Query for resources/items of a certain kind.
5899
5900   """
5901   # pylint: disable=W0142
5902   REQ_BGL = False
5903
5904   def CheckArguments(self):
5905     self.qcls = _GetQueryImplementation(self.op.what)
5906
5907   def ExpandNames(self):
5908     self.needed_locks = {}
5909
5910   def Exec(self, feedback_fn):
5911     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5912
5913
5914 class LUNodeModifyStorage(NoHooksLU):
5915   """Logical unit for modifying a storage volume on a node.
5916
5917   """
5918   REQ_BGL = False
5919
5920   def CheckArguments(self):
5921     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5922
5923     storage_type = self.op.storage_type
5924
5925     try:
5926       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5927     except KeyError:
5928       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5929                                  " modified" % storage_type,
5930                                  errors.ECODE_INVAL)
5931
5932     diff = set(self.op.changes.keys()) - modifiable
5933     if diff:
5934       raise errors.OpPrereqError("The following fields can not be modified for"
5935                                  " storage units of type '%s': %r" %
5936                                  (storage_type, list(diff)),
5937                                  errors.ECODE_INVAL)
5938
5939   def ExpandNames(self):
5940     self.needed_locks = {
5941       locking.LEVEL_NODE: self.op.node_name,
5942       }
5943
5944   def Exec(self, feedback_fn):
5945     """Computes the list of nodes and their attributes.
5946
5947     """
5948     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5949     result = self.rpc.call_storage_modify(self.op.node_name,
5950                                           self.op.storage_type, st_args,
5951                                           self.op.name, self.op.changes)
5952     result.Raise("Failed to modify storage unit '%s' on %s" %
5953                  (self.op.name, self.op.node_name))
5954
5955
5956 class LUNodeAdd(LogicalUnit):
5957   """Logical unit for adding node to the cluster.
5958
5959   """
5960   HPATH = "node-add"
5961   HTYPE = constants.HTYPE_NODE
5962   _NFLAGS = ["master_capable", "vm_capable"]
5963
5964   def CheckArguments(self):
5965     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5966     # validate/normalize the node name
5967     self.hostname = netutils.GetHostname(name=self.op.node_name,
5968                                          family=self.primary_ip_family)
5969     self.op.node_name = self.hostname.name
5970
5971     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5972       raise errors.OpPrereqError("Cannot readd the master node",
5973                                  errors.ECODE_STATE)
5974
5975     if self.op.readd and self.op.group:
5976       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5977                                  " being readded", errors.ECODE_INVAL)
5978
5979   def BuildHooksEnv(self):
5980     """Build hooks env.
5981
5982     This will run on all nodes before, and on all nodes + the new node after.
5983
5984     """
5985     return {
5986       "OP_TARGET": self.op.node_name,
5987       "NODE_NAME": self.op.node_name,
5988       "NODE_PIP": self.op.primary_ip,
5989       "NODE_SIP": self.op.secondary_ip,
5990       "MASTER_CAPABLE": str(self.op.master_capable),
5991       "VM_CAPABLE": str(self.op.vm_capable),
5992       }
5993
5994   def BuildHooksNodes(self):
5995     """Build hooks nodes.
5996
5997     """
5998     # Exclude added node
5999     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6000     post_nodes = pre_nodes + [self.op.node_name, ]
6001
6002     return (pre_nodes, post_nodes)
6003
6004   def CheckPrereq(self):
6005     """Check prerequisites.
6006
6007     This checks:
6008      - the new node is not already in the config
6009      - it is resolvable
6010      - its parameters (single/dual homed) matches the cluster
6011
6012     Any errors are signaled by raising errors.OpPrereqError.
6013
6014     """
6015     cfg = self.cfg
6016     hostname = self.hostname
6017     node = hostname.name
6018     primary_ip = self.op.primary_ip = hostname.ip
6019     if self.op.secondary_ip is None:
6020       if self.primary_ip_family == netutils.IP6Address.family:
6021         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6022                                    " IPv4 address must be given as secondary",
6023                                    errors.ECODE_INVAL)
6024       self.op.secondary_ip = primary_ip
6025
6026     secondary_ip = self.op.secondary_ip
6027     if not netutils.IP4Address.IsValid(secondary_ip):
6028       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6029                                  " address" % secondary_ip, errors.ECODE_INVAL)
6030
6031     node_list = cfg.GetNodeList()
6032     if not self.op.readd and node in node_list:
6033       raise errors.OpPrereqError("Node %s is already in the configuration" %
6034                                  node, errors.ECODE_EXISTS)
6035     elif self.op.readd and node not in node_list:
6036       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6037                                  errors.ECODE_NOENT)
6038
6039     self.changed_primary_ip = False
6040
6041     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6042       if self.op.readd and node == existing_node_name:
6043         if existing_node.secondary_ip != secondary_ip:
6044           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6045                                      " address configuration as before",
6046                                      errors.ECODE_INVAL)
6047         if existing_node.primary_ip != primary_ip:
6048           self.changed_primary_ip = True
6049
6050         continue
6051
6052       if (existing_node.primary_ip == primary_ip or
6053           existing_node.secondary_ip == primary_ip or
6054           existing_node.primary_ip == secondary_ip or
6055           existing_node.secondary_ip == secondary_ip):
6056         raise errors.OpPrereqError("New node ip address(es) conflict with"
6057                                    " existing node %s" % existing_node.name,
6058                                    errors.ECODE_NOTUNIQUE)
6059
6060     # After this 'if' block, None is no longer a valid value for the
6061     # _capable op attributes
6062     if self.op.readd:
6063       old_node = self.cfg.GetNodeInfo(node)
6064       assert old_node is not None, "Can't retrieve locked node %s" % node
6065       for attr in self._NFLAGS:
6066         if getattr(self.op, attr) is None:
6067           setattr(self.op, attr, getattr(old_node, attr))
6068     else:
6069       for attr in self._NFLAGS:
6070         if getattr(self.op, attr) is None:
6071           setattr(self.op, attr, True)
6072
6073     if self.op.readd and not self.op.vm_capable:
6074       pri, sec = cfg.GetNodeInstances(node)
6075       if pri or sec:
6076         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6077                                    " flag set to false, but it already holds"
6078                                    " instances" % node,
6079                                    errors.ECODE_STATE)
6080
6081     # check that the type of the node (single versus dual homed) is the
6082     # same as for the master
6083     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6084     master_singlehomed = myself.secondary_ip == myself.primary_ip
6085     newbie_singlehomed = secondary_ip == primary_ip
6086     if master_singlehomed != newbie_singlehomed:
6087       if master_singlehomed:
6088         raise errors.OpPrereqError("The master has no secondary ip but the"
6089                                    " new node has one",
6090                                    errors.ECODE_INVAL)
6091       else:
6092         raise errors.OpPrereqError("The master has a secondary ip but the"
6093                                    " new node doesn't have one",
6094                                    errors.ECODE_INVAL)
6095
6096     # checks reachability
6097     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6098       raise errors.OpPrereqError("Node not reachable by ping",
6099                                  errors.ECODE_ENVIRON)
6100
6101     if not newbie_singlehomed:
6102       # check reachability from my secondary ip to newbie's secondary ip
6103       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6104                               source=myself.secondary_ip):
6105         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6106                                    " based ping to node daemon port",
6107                                    errors.ECODE_ENVIRON)
6108
6109     if self.op.readd:
6110       exceptions = [node]
6111     else:
6112       exceptions = []
6113
6114     if self.op.master_capable:
6115       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6116     else:
6117       self.master_candidate = False
6118
6119     if self.op.readd:
6120       self.new_node = old_node
6121     else:
6122       node_group = cfg.LookupNodeGroup(self.op.group)
6123       self.new_node = objects.Node(name=node,
6124                                    primary_ip=primary_ip,
6125                                    secondary_ip=secondary_ip,
6126                                    master_candidate=self.master_candidate,
6127                                    offline=False, drained=False,
6128                                    group=node_group, ndparams={})
6129
6130     if self.op.ndparams:
6131       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6132
6133     if self.op.hv_state:
6134       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6135
6136     if self.op.disk_state:
6137       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6138
6139     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6140     #       it a property on the base class.
6141     rpcrunner = rpc.DnsOnlyRunner()
6142     result = rpcrunner.call_version([node])[node]
6143     result.Raise("Can't get version information from node %s" % node)
6144     if constants.PROTOCOL_VERSION == result.payload:
6145       logging.info("Communication to node %s fine, sw version %s match",
6146                    node, result.payload)
6147     else:
6148       raise errors.OpPrereqError("Version mismatch master version %s,"
6149                                  " node version %s" %
6150                                  (constants.PROTOCOL_VERSION, result.payload),
6151                                  errors.ECODE_ENVIRON)
6152
6153     vg_name = cfg.GetVGName()
6154     if vg_name is not None:
6155       vparams = {constants.NV_PVLIST: [vg_name]}
6156       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6157       if self.op.ndparams:
6158         excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6159                                          excl_stor)
6160       cname = self.cfg.GetClusterName()
6161       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6162       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6163       if errmsgs:
6164         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6165                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6166
6167   def Exec(self, feedback_fn):
6168     """Adds the new node to the cluster.
6169
6170     """
6171     new_node = self.new_node
6172     node = new_node.name
6173
6174     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6175       "Not owning BGL"
6176
6177     # We adding a new node so we assume it's powered
6178     new_node.powered = True
6179
6180     # for re-adds, reset the offline/drained/master-candidate flags;
6181     # we need to reset here, otherwise offline would prevent RPC calls
6182     # later in the procedure; this also means that if the re-add
6183     # fails, we are left with a non-offlined, broken node
6184     if self.op.readd:
6185       new_node.drained = new_node.offline = False # pylint: disable=W0201
6186       self.LogInfo("Readding a node, the offline/drained flags were reset")
6187       # if we demote the node, we do cleanup later in the procedure
6188       new_node.master_candidate = self.master_candidate
6189       if self.changed_primary_ip:
6190         new_node.primary_ip = self.op.primary_ip
6191
6192     # copy the master/vm_capable flags
6193     for attr in self._NFLAGS:
6194       setattr(new_node, attr, getattr(self.op, attr))
6195
6196     # notify the user about any possible mc promotion
6197     if new_node.master_candidate:
6198       self.LogInfo("Node will be a master candidate")
6199
6200     if self.op.ndparams:
6201       new_node.ndparams = self.op.ndparams
6202     else:
6203       new_node.ndparams = {}
6204
6205     if self.op.hv_state:
6206       new_node.hv_state_static = self.new_hv_state
6207
6208     if self.op.disk_state:
6209       new_node.disk_state_static = self.new_disk_state
6210
6211     # Add node to our /etc/hosts, and add key to known_hosts
6212     if self.cfg.GetClusterInfo().modify_etc_hosts:
6213       master_node = self.cfg.GetMasterNode()
6214       result = self.rpc.call_etc_hosts_modify(master_node,
6215                                               constants.ETC_HOSTS_ADD,
6216                                               self.hostname.name,
6217                                               self.hostname.ip)
6218       result.Raise("Can't update hosts file with new host data")
6219
6220     if new_node.secondary_ip != new_node.primary_ip:
6221       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6222                                False)
6223
6224     node_verify_list = [self.cfg.GetMasterNode()]
6225     node_verify_param = {
6226       constants.NV_NODELIST: ([node], {}),
6227       # TODO: do a node-net-test as well?
6228     }
6229
6230     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6231                                        self.cfg.GetClusterName())
6232     for verifier in node_verify_list:
6233       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6234       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6235       if nl_payload:
6236         for failed in nl_payload:
6237           feedback_fn("ssh/hostname verification failed"
6238                       " (checking from %s): %s" %
6239                       (verifier, nl_payload[failed]))
6240         raise errors.OpExecError("ssh/hostname verification failed")
6241
6242     if self.op.readd:
6243       _RedistributeAncillaryFiles(self)
6244       self.context.ReaddNode(new_node)
6245       # make sure we redistribute the config
6246       self.cfg.Update(new_node, feedback_fn)
6247       # and make sure the new node will not have old files around
6248       if not new_node.master_candidate:
6249         result = self.rpc.call_node_demote_from_mc(new_node.name)
6250         msg = result.fail_msg
6251         if msg:
6252           self.LogWarning("Node failed to demote itself from master"
6253                           " candidate status: %s" % msg)
6254     else:
6255       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6256                                   additional_vm=self.op.vm_capable)
6257       self.context.AddNode(new_node, self.proc.GetECId())
6258
6259
6260 class LUNodeSetParams(LogicalUnit):
6261   """Modifies the parameters of a node.
6262
6263   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6264       to the node role (as _ROLE_*)
6265   @cvar _R2F: a dictionary from node role to tuples of flags
6266   @cvar _FLAGS: a list of attribute names corresponding to the flags
6267
6268   """
6269   HPATH = "node-modify"
6270   HTYPE = constants.HTYPE_NODE
6271   REQ_BGL = False
6272   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6273   _F2R = {
6274     (True, False, False): _ROLE_CANDIDATE,
6275     (False, True, False): _ROLE_DRAINED,
6276     (False, False, True): _ROLE_OFFLINE,
6277     (False, False, False): _ROLE_REGULAR,
6278     }
6279   _R2F = dict((v, k) for k, v in _F2R.items())
6280   _FLAGS = ["master_candidate", "drained", "offline"]
6281
6282   def CheckArguments(self):
6283     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6284     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6285                 self.op.master_capable, self.op.vm_capable,
6286                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6287                 self.op.disk_state]
6288     if all_mods.count(None) == len(all_mods):
6289       raise errors.OpPrereqError("Please pass at least one modification",
6290                                  errors.ECODE_INVAL)
6291     if all_mods.count(True) > 1:
6292       raise errors.OpPrereqError("Can't set the node into more than one"
6293                                  " state at the same time",
6294                                  errors.ECODE_INVAL)
6295
6296     # Boolean value that tells us whether we might be demoting from MC
6297     self.might_demote = (self.op.master_candidate is False or
6298                          self.op.offline is True or
6299                          self.op.drained is True or
6300                          self.op.master_capable is False)
6301
6302     if self.op.secondary_ip:
6303       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6304         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6305                                    " address" % self.op.secondary_ip,
6306                                    errors.ECODE_INVAL)
6307
6308     self.lock_all = self.op.auto_promote and self.might_demote
6309     self.lock_instances = self.op.secondary_ip is not None
6310
6311   def _InstanceFilter(self, instance):
6312     """Filter for getting affected instances.
6313
6314     """
6315     return (instance.disk_template in constants.DTS_INT_MIRROR and
6316             self.op.node_name in instance.all_nodes)
6317
6318   def ExpandNames(self):
6319     if self.lock_all:
6320       self.needed_locks = {
6321         locking.LEVEL_NODE: locking.ALL_SET,
6322
6323         # Block allocations when all nodes are locked
6324         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6325         }
6326     else:
6327       self.needed_locks = {
6328         locking.LEVEL_NODE: self.op.node_name,
6329         }
6330
6331     # Since modifying a node can have severe effects on currently running
6332     # operations the resource lock is at least acquired in shared mode
6333     self.needed_locks[locking.LEVEL_NODE_RES] = \
6334       self.needed_locks[locking.LEVEL_NODE]
6335
6336     # Get all locks except nodes in shared mode; they are not used for anything
6337     # but read-only access
6338     self.share_locks = _ShareAll()
6339     self.share_locks[locking.LEVEL_NODE] = 0
6340     self.share_locks[locking.LEVEL_NODE_RES] = 0
6341     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6342
6343     if self.lock_instances:
6344       self.needed_locks[locking.LEVEL_INSTANCE] = \
6345         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6346
6347   def BuildHooksEnv(self):
6348     """Build hooks env.
6349
6350     This runs on the master node.
6351
6352     """
6353     return {
6354       "OP_TARGET": self.op.node_name,
6355       "MASTER_CANDIDATE": str(self.op.master_candidate),
6356       "OFFLINE": str(self.op.offline),
6357       "DRAINED": str(self.op.drained),
6358       "MASTER_CAPABLE": str(self.op.master_capable),
6359       "VM_CAPABLE": str(self.op.vm_capable),
6360       }
6361
6362   def BuildHooksNodes(self):
6363     """Build hooks nodes.
6364
6365     """
6366     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6367     return (nl, nl)
6368
6369   def CheckPrereq(self):
6370     """Check prerequisites.
6371
6372     This only checks the instance list against the existing names.
6373
6374     """
6375     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6376
6377     if self.lock_instances:
6378       affected_instances = \
6379         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6380
6381       # Verify instance locks
6382       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6383       wanted_instances = frozenset(affected_instances.keys())
6384       if wanted_instances - owned_instances:
6385         raise errors.OpPrereqError("Instances affected by changing node %s's"
6386                                    " secondary IP address have changed since"
6387                                    " locks were acquired, wanted '%s', have"
6388                                    " '%s'; retry the operation" %
6389                                    (self.op.node_name,
6390                                     utils.CommaJoin(wanted_instances),
6391                                     utils.CommaJoin(owned_instances)),
6392                                    errors.ECODE_STATE)
6393     else:
6394       affected_instances = None
6395
6396     if (self.op.master_candidate is not None or
6397         self.op.drained is not None or
6398         self.op.offline is not None):
6399       # we can't change the master's node flags
6400       if self.op.node_name == self.cfg.GetMasterNode():
6401         raise errors.OpPrereqError("The master role can be changed"
6402                                    " only via master-failover",
6403                                    errors.ECODE_INVAL)
6404
6405     if self.op.master_candidate and not node.master_capable:
6406       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6407                                  " it a master candidate" % node.name,
6408                                  errors.ECODE_STATE)
6409
6410     if self.op.vm_capable is False:
6411       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6412       if ipri or isec:
6413         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6414                                    " the vm_capable flag" % node.name,
6415                                    errors.ECODE_STATE)
6416
6417     if node.master_candidate and self.might_demote and not self.lock_all:
6418       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6419       # check if after removing the current node, we're missing master
6420       # candidates
6421       (mc_remaining, mc_should, _) = \
6422           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6423       if mc_remaining < mc_should:
6424         raise errors.OpPrereqError("Not enough master candidates, please"
6425                                    " pass auto promote option to allow"
6426                                    " promotion (--auto-promote or RAPI"
6427                                    " auto_promote=True)", errors.ECODE_STATE)
6428
6429     self.old_flags = old_flags = (node.master_candidate,
6430                                   node.drained, node.offline)
6431     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6432     self.old_role = old_role = self._F2R[old_flags]
6433
6434     # Check for ineffective changes
6435     for attr in self._FLAGS:
6436       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6437         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6438         setattr(self.op, attr, None)
6439
6440     # Past this point, any flag change to False means a transition
6441     # away from the respective state, as only real changes are kept
6442
6443     # TODO: We might query the real power state if it supports OOB
6444     if _SupportsOob(self.cfg, node):
6445       if self.op.offline is False and not (node.powered or
6446                                            self.op.powered is True):
6447         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6448                                     " offline status can be reset") %
6449                                    self.op.node_name, errors.ECODE_STATE)
6450     elif self.op.powered is not None:
6451       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6452                                   " as it does not support out-of-band"
6453                                   " handling") % self.op.node_name,
6454                                  errors.ECODE_STATE)
6455
6456     # If we're being deofflined/drained, we'll MC ourself if needed
6457     if (self.op.drained is False or self.op.offline is False or
6458         (self.op.master_capable and not node.master_capable)):
6459       if _DecideSelfPromotion(self):
6460         self.op.master_candidate = True
6461         self.LogInfo("Auto-promoting node to master candidate")
6462
6463     # If we're no longer master capable, we'll demote ourselves from MC
6464     if self.op.master_capable is False and node.master_candidate:
6465       self.LogInfo("Demoting from master candidate")
6466       self.op.master_candidate = False
6467
6468     # Compute new role
6469     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6470     if self.op.master_candidate:
6471       new_role = self._ROLE_CANDIDATE
6472     elif self.op.drained:
6473       new_role = self._ROLE_DRAINED
6474     elif self.op.offline:
6475       new_role = self._ROLE_OFFLINE
6476     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6477       # False is still in new flags, which means we're un-setting (the
6478       # only) True flag
6479       new_role = self._ROLE_REGULAR
6480     else: # no new flags, nothing, keep old role
6481       new_role = old_role
6482
6483     self.new_role = new_role
6484
6485     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6486       # Trying to transition out of offline status
6487       result = self.rpc.call_version([node.name])[node.name]
6488       if result.fail_msg:
6489         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6490                                    " to report its version: %s" %
6491                                    (node.name, result.fail_msg),
6492                                    errors.ECODE_STATE)
6493       else:
6494         self.LogWarning("Transitioning node from offline to online state"
6495                         " without using re-add. Please make sure the node"
6496                         " is healthy!")
6497
6498     # When changing the secondary ip, verify if this is a single-homed to
6499     # multi-homed transition or vice versa, and apply the relevant
6500     # restrictions.
6501     if self.op.secondary_ip:
6502       # Ok even without locking, because this can't be changed by any LU
6503       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6504       master_singlehomed = master.secondary_ip == master.primary_ip
6505       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6506         if self.op.force and node.name == master.name:
6507           self.LogWarning("Transitioning from single-homed to multi-homed"
6508                           " cluster; all nodes will require a secondary IP"
6509                           " address")
6510         else:
6511           raise errors.OpPrereqError("Changing the secondary ip on a"
6512                                      " single-homed cluster requires the"
6513                                      " --force option to be passed, and the"
6514                                      " target node to be the master",
6515                                      errors.ECODE_INVAL)
6516       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6517         if self.op.force and node.name == master.name:
6518           self.LogWarning("Transitioning from multi-homed to single-homed"
6519                           " cluster; secondary IP addresses will have to be"
6520                           " removed")
6521         else:
6522           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6523                                      " same as the primary IP on a multi-homed"
6524                                      " cluster, unless the --force option is"
6525                                      " passed, and the target node is the"
6526                                      " master", errors.ECODE_INVAL)
6527
6528       assert not (frozenset(affected_instances) -
6529                   self.owned_locks(locking.LEVEL_INSTANCE))
6530
6531       if node.offline:
6532         if affected_instances:
6533           msg = ("Cannot change secondary IP address: offline node has"
6534                  " instances (%s) configured to use it" %
6535                  utils.CommaJoin(affected_instances.keys()))
6536           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6537       else:
6538         # On online nodes, check that no instances are running, and that
6539         # the node has the new ip and we can reach it.
6540         for instance in affected_instances.values():
6541           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6542                               msg="cannot change secondary ip")
6543
6544         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6545         if master.name != node.name:
6546           # check reachability from master secondary ip to new secondary ip
6547           if not netutils.TcpPing(self.op.secondary_ip,
6548                                   constants.DEFAULT_NODED_PORT,
6549                                   source=master.secondary_ip):
6550             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6551                                        " based ping to node daemon port",
6552                                        errors.ECODE_ENVIRON)
6553
6554     if self.op.ndparams:
6555       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6556       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6557       self.new_ndparams = new_ndparams
6558
6559     if self.op.hv_state:
6560       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6561                                                  self.node.hv_state_static)
6562
6563     if self.op.disk_state:
6564       self.new_disk_state = \
6565         _MergeAndVerifyDiskState(self.op.disk_state,
6566                                  self.node.disk_state_static)
6567
6568   def Exec(self, feedback_fn):
6569     """Modifies a node.
6570
6571     """
6572     node = self.node
6573     old_role = self.old_role
6574     new_role = self.new_role
6575
6576     result = []
6577
6578     if self.op.ndparams:
6579       node.ndparams = self.new_ndparams
6580
6581     if self.op.powered is not None:
6582       node.powered = self.op.powered
6583
6584     if self.op.hv_state:
6585       node.hv_state_static = self.new_hv_state
6586
6587     if self.op.disk_state:
6588       node.disk_state_static = self.new_disk_state
6589
6590     for attr in ["master_capable", "vm_capable"]:
6591       val = getattr(self.op, attr)
6592       if val is not None:
6593         setattr(node, attr, val)
6594         result.append((attr, str(val)))
6595
6596     if new_role != old_role:
6597       # Tell the node to demote itself, if no longer MC and not offline
6598       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6599         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6600         if msg:
6601           self.LogWarning("Node failed to demote itself: %s", msg)
6602
6603       new_flags = self._R2F[new_role]
6604       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6605         if of != nf:
6606           result.append((desc, str(nf)))
6607       (node.master_candidate, node.drained, node.offline) = new_flags
6608
6609       # we locked all nodes, we adjust the CP before updating this node
6610       if self.lock_all:
6611         _AdjustCandidatePool(self, [node.name])
6612
6613     if self.op.secondary_ip:
6614       node.secondary_ip = self.op.secondary_ip
6615       result.append(("secondary_ip", self.op.secondary_ip))
6616
6617     # this will trigger configuration file update, if needed
6618     self.cfg.Update(node, feedback_fn)
6619
6620     # this will trigger job queue propagation or cleanup if the mc
6621     # flag changed
6622     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6623       self.context.ReaddNode(node)
6624
6625     return result
6626
6627
6628 class LUNodePowercycle(NoHooksLU):
6629   """Powercycles a node.
6630
6631   """
6632   REQ_BGL = False
6633
6634   def CheckArguments(self):
6635     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6636     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6637       raise errors.OpPrereqError("The node is the master and the force"
6638                                  " parameter was not set",
6639                                  errors.ECODE_INVAL)
6640
6641   def ExpandNames(self):
6642     """Locking for PowercycleNode.
6643
6644     This is a last-resort option and shouldn't block on other
6645     jobs. Therefore, we grab no locks.
6646
6647     """
6648     self.needed_locks = {}
6649
6650   def Exec(self, feedback_fn):
6651     """Reboots a node.
6652
6653     """
6654     result = self.rpc.call_node_powercycle(self.op.node_name,
6655                                            self.cfg.GetHypervisorType())
6656     result.Raise("Failed to schedule the reboot")
6657     return result.payload
6658
6659
6660 class LUClusterQuery(NoHooksLU):
6661   """Query cluster configuration.
6662
6663   """
6664   REQ_BGL = False
6665
6666   def ExpandNames(self):
6667     self.needed_locks = {}
6668
6669   def Exec(self, feedback_fn):
6670     """Return cluster config.
6671
6672     """
6673     cluster = self.cfg.GetClusterInfo()
6674     os_hvp = {}
6675
6676     # Filter just for enabled hypervisors
6677     for os_name, hv_dict in cluster.os_hvp.items():
6678       os_hvp[os_name] = {}
6679       for hv_name, hv_params in hv_dict.items():
6680         if hv_name in cluster.enabled_hypervisors:
6681           os_hvp[os_name][hv_name] = hv_params
6682
6683     # Convert ip_family to ip_version
6684     primary_ip_version = constants.IP4_VERSION
6685     if cluster.primary_ip_family == netutils.IP6Address.family:
6686       primary_ip_version = constants.IP6_VERSION
6687
6688     result = {
6689       "software_version": constants.RELEASE_VERSION,
6690       "protocol_version": constants.PROTOCOL_VERSION,
6691       "config_version": constants.CONFIG_VERSION,
6692       "os_api_version": max(constants.OS_API_VERSIONS),
6693       "export_version": constants.EXPORT_VERSION,
6694       "architecture": runtime.GetArchInfo(),
6695       "name": cluster.cluster_name,
6696       "master": cluster.master_node,
6697       "default_hypervisor": cluster.primary_hypervisor,
6698       "enabled_hypervisors": cluster.enabled_hypervisors,
6699       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6700                         for hypervisor_name in cluster.enabled_hypervisors]),
6701       "os_hvp": os_hvp,
6702       "beparams": cluster.beparams,
6703       "osparams": cluster.osparams,
6704       "ipolicy": cluster.ipolicy,
6705       "nicparams": cluster.nicparams,
6706       "ndparams": cluster.ndparams,
6707       "diskparams": cluster.diskparams,
6708       "candidate_pool_size": cluster.candidate_pool_size,
6709       "master_netdev": cluster.master_netdev,
6710       "master_netmask": cluster.master_netmask,
6711       "use_external_mip_script": cluster.use_external_mip_script,
6712       "volume_group_name": cluster.volume_group_name,
6713       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6714       "file_storage_dir": cluster.file_storage_dir,
6715       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6716       "maintain_node_health": cluster.maintain_node_health,
6717       "ctime": cluster.ctime,
6718       "mtime": cluster.mtime,
6719       "uuid": cluster.uuid,
6720       "tags": list(cluster.GetTags()),
6721       "uid_pool": cluster.uid_pool,
6722       "default_iallocator": cluster.default_iallocator,
6723       "reserved_lvs": cluster.reserved_lvs,
6724       "primary_ip_version": primary_ip_version,
6725       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6726       "hidden_os": cluster.hidden_os,
6727       "blacklisted_os": cluster.blacklisted_os,
6728       }
6729
6730     return result
6731
6732
6733 class LUClusterConfigQuery(NoHooksLU):
6734   """Return configuration values.
6735
6736   """
6737   REQ_BGL = False
6738
6739   def CheckArguments(self):
6740     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6741
6742   def ExpandNames(self):
6743     self.cq.ExpandNames(self)
6744
6745   def DeclareLocks(self, level):
6746     self.cq.DeclareLocks(self, level)
6747
6748   def Exec(self, feedback_fn):
6749     result = self.cq.OldStyleQuery(self)
6750
6751     assert len(result) == 1
6752
6753     return result[0]
6754
6755
6756 class _ClusterQuery(_QueryBase):
6757   FIELDS = query.CLUSTER_FIELDS
6758
6759   #: Do not sort (there is only one item)
6760   SORT_FIELD = None
6761
6762   def ExpandNames(self, lu):
6763     lu.needed_locks = {}
6764
6765     # The following variables interact with _QueryBase._GetNames
6766     self.wanted = locking.ALL_SET
6767     self.do_locking = self.use_locking
6768
6769     if self.do_locking:
6770       raise errors.OpPrereqError("Can not use locking for cluster queries",
6771                                  errors.ECODE_INVAL)
6772
6773   def DeclareLocks(self, lu, level):
6774     pass
6775
6776   def _GetQueryData(self, lu):
6777     """Computes the list of nodes and their attributes.
6778
6779     """
6780     # Locking is not used
6781     assert not (compat.any(lu.glm.is_owned(level)
6782                            for level in locking.LEVELS
6783                            if level != locking.LEVEL_CLUSTER) or
6784                 self.do_locking or self.use_locking)
6785
6786     if query.CQ_CONFIG in self.requested_data:
6787       cluster = lu.cfg.GetClusterInfo()
6788     else:
6789       cluster = NotImplemented
6790
6791     if query.CQ_QUEUE_DRAINED in self.requested_data:
6792       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6793     else:
6794       drain_flag = NotImplemented
6795
6796     if query.CQ_WATCHER_PAUSE in self.requested_data:
6797       master_name = lu.cfg.GetMasterNode()
6798
6799       result = lu.rpc.call_get_watcher_pause(master_name)
6800       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6801                    master_name)
6802
6803       watcher_pause = result.payload
6804     else:
6805       watcher_pause = NotImplemented
6806
6807     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6808
6809
6810 class LUInstanceActivateDisks(NoHooksLU):
6811   """Bring up an instance's disks.
6812
6813   """
6814   REQ_BGL = False
6815
6816   def ExpandNames(self):
6817     self._ExpandAndLockInstance()
6818     self.needed_locks[locking.LEVEL_NODE] = []
6819     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6820
6821   def DeclareLocks(self, level):
6822     if level == locking.LEVEL_NODE:
6823       self._LockInstancesNodes()
6824
6825   def CheckPrereq(self):
6826     """Check prerequisites.
6827
6828     This checks that the instance is in the cluster.
6829
6830     """
6831     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832     assert self.instance is not None, \
6833       "Cannot retrieve locked instance %s" % self.op.instance_name
6834     _CheckNodeOnline(self, self.instance.primary_node)
6835
6836   def Exec(self, feedback_fn):
6837     """Activate the disks.
6838
6839     """
6840     disks_ok, disks_info = \
6841               _AssembleInstanceDisks(self, self.instance,
6842                                      ignore_size=self.op.ignore_size)
6843     if not disks_ok:
6844       raise errors.OpExecError("Cannot activate block devices")
6845
6846     if self.op.wait_for_sync:
6847       if not _WaitForSync(self, self.instance):
6848         raise errors.OpExecError("Some disks of the instance are degraded!")
6849
6850     return disks_info
6851
6852
6853 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6854                            ignore_size=False):
6855   """Prepare the block devices for an instance.
6856
6857   This sets up the block devices on all nodes.
6858
6859   @type lu: L{LogicalUnit}
6860   @param lu: the logical unit on whose behalf we execute
6861   @type instance: L{objects.Instance}
6862   @param instance: the instance for whose disks we assemble
6863   @type disks: list of L{objects.Disk} or None
6864   @param disks: which disks to assemble (or all, if None)
6865   @type ignore_secondaries: boolean
6866   @param ignore_secondaries: if true, errors on secondary nodes
6867       won't result in an error return from the function
6868   @type ignore_size: boolean
6869   @param ignore_size: if true, the current known size of the disk
6870       will not be used during the disk activation, useful for cases
6871       when the size is wrong
6872   @return: False if the operation failed, otherwise a list of
6873       (host, instance_visible_name, node_visible_name)
6874       with the mapping from node devices to instance devices
6875
6876   """
6877   device_info = []
6878   disks_ok = True
6879   iname = instance.name
6880   disks = _ExpandCheckDisks(instance, disks)
6881
6882   # With the two passes mechanism we try to reduce the window of
6883   # opportunity for the race condition of switching DRBD to primary
6884   # before handshaking occured, but we do not eliminate it
6885
6886   # The proper fix would be to wait (with some limits) until the
6887   # connection has been made and drbd transitions from WFConnection
6888   # into any other network-connected state (Connected, SyncTarget,
6889   # SyncSource, etc.)
6890
6891   # 1st pass, assemble on all nodes in secondary mode
6892   for idx, inst_disk in enumerate(disks):
6893     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6894       if ignore_size:
6895         node_disk = node_disk.Copy()
6896         node_disk.UnsetSize()
6897       lu.cfg.SetDiskID(node_disk, node)
6898       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6899                                              False, idx)
6900       msg = result.fail_msg
6901       if msg:
6902         is_offline_secondary = (node in instance.secondary_nodes and
6903                                 result.offline)
6904         lu.LogWarning("Could not prepare block device %s on node %s"
6905                       " (is_primary=False, pass=1): %s",
6906                       inst_disk.iv_name, node, msg)
6907         if not (ignore_secondaries or is_offline_secondary):
6908           disks_ok = False
6909
6910   # FIXME: race condition on drbd migration to primary
6911
6912   # 2nd pass, do only the primary node
6913   for idx, inst_disk in enumerate(disks):
6914     dev_path = None
6915
6916     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6917       if node != instance.primary_node:
6918         continue
6919       if ignore_size:
6920         node_disk = node_disk.Copy()
6921         node_disk.UnsetSize()
6922       lu.cfg.SetDiskID(node_disk, node)
6923       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6924                                              True, idx)
6925       msg = result.fail_msg
6926       if msg:
6927         lu.LogWarning("Could not prepare block device %s on node %s"
6928                       " (is_primary=True, pass=2): %s",
6929                       inst_disk.iv_name, node, msg)
6930         disks_ok = False
6931       else:
6932         dev_path = result.payload
6933
6934     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6935
6936   # leave the disks configured for the primary node
6937   # this is a workaround that would be fixed better by
6938   # improving the logical/physical id handling
6939   for disk in disks:
6940     lu.cfg.SetDiskID(disk, instance.primary_node)
6941
6942   return disks_ok, device_info
6943
6944
6945 def _StartInstanceDisks(lu, instance, force):
6946   """Start the disks of an instance.
6947
6948   """
6949   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6950                                            ignore_secondaries=force)
6951   if not disks_ok:
6952     _ShutdownInstanceDisks(lu, instance)
6953     if force is not None and not force:
6954       lu.LogWarning("",
6955                     hint=("If the message above refers to a secondary node,"
6956                           " you can retry the operation using '--force'"))
6957     raise errors.OpExecError("Disk consistency error")
6958
6959
6960 class LUInstanceDeactivateDisks(NoHooksLU):
6961   """Shutdown an instance's disks.
6962
6963   """
6964   REQ_BGL = False
6965
6966   def ExpandNames(self):
6967     self._ExpandAndLockInstance()
6968     self.needed_locks[locking.LEVEL_NODE] = []
6969     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6970
6971   def DeclareLocks(self, level):
6972     if level == locking.LEVEL_NODE:
6973       self._LockInstancesNodes()
6974
6975   def CheckPrereq(self):
6976     """Check prerequisites.
6977
6978     This checks that the instance is in the cluster.
6979
6980     """
6981     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6982     assert self.instance is not None, \
6983       "Cannot retrieve locked instance %s" % self.op.instance_name
6984
6985   def Exec(self, feedback_fn):
6986     """Deactivate the disks
6987
6988     """
6989     instance = self.instance
6990     if self.op.force:
6991       _ShutdownInstanceDisks(self, instance)
6992     else:
6993       _SafeShutdownInstanceDisks(self, instance)
6994
6995
6996 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6997   """Shutdown block devices of an instance.
6998
6999   This function checks if an instance is running, before calling
7000   _ShutdownInstanceDisks.
7001
7002   """
7003   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7004   _ShutdownInstanceDisks(lu, instance, disks=disks)
7005
7006
7007 def _ExpandCheckDisks(instance, disks):
7008   """Return the instance disks selected by the disks list
7009
7010   @type disks: list of L{objects.Disk} or None
7011   @param disks: selected disks
7012   @rtype: list of L{objects.Disk}
7013   @return: selected instance disks to act on
7014
7015   """
7016   if disks is None:
7017     return instance.disks
7018   else:
7019     if not set(disks).issubset(instance.disks):
7020       raise errors.ProgrammerError("Can only act on disks belonging to the"
7021                                    " target instance")
7022     return disks
7023
7024
7025 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7026   """Shutdown block devices of an instance.
7027
7028   This does the shutdown on all nodes of the instance.
7029
7030   If the ignore_primary is false, errors on the primary node are
7031   ignored.
7032
7033   """
7034   all_result = True
7035   disks = _ExpandCheckDisks(instance, disks)
7036
7037   for disk in disks:
7038     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7039       lu.cfg.SetDiskID(top_disk, node)
7040       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7041       msg = result.fail_msg
7042       if msg:
7043         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7044                       disk.iv_name, node, msg)
7045         if ((node == instance.primary_node and not ignore_primary) or
7046             (node != instance.primary_node and not result.offline)):
7047           all_result = False
7048   return all_result
7049
7050
7051 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7052   """Checks if a node has enough free memory.
7053
7054   This function checks if a given node has the needed amount of free
7055   memory. In case the node has less memory or we cannot get the
7056   information from the node, this function raises an OpPrereqError
7057   exception.
7058
7059   @type lu: C{LogicalUnit}
7060   @param lu: a logical unit from which we get configuration data
7061   @type node: C{str}
7062   @param node: the node to check
7063   @type reason: C{str}
7064   @param reason: string to use in the error message
7065   @type requested: C{int}
7066   @param requested: the amount of memory in MiB to check for
7067   @type hypervisor_name: C{str}
7068   @param hypervisor_name: the hypervisor to ask for memory stats
7069   @rtype: integer
7070   @return: node current free memory
7071   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7072       we cannot check the node
7073
7074   """
7075   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7076   nodeinfo[node].Raise("Can't get data from node %s" % node,
7077                        prereq=True, ecode=errors.ECODE_ENVIRON)
7078   (_, _, (hv_info, )) = nodeinfo[node].payload
7079
7080   free_mem = hv_info.get("memory_free", None)
7081   if not isinstance(free_mem, int):
7082     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7083                                " was '%s'" % (node, free_mem),
7084                                errors.ECODE_ENVIRON)
7085   if requested > free_mem:
7086     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7087                                " needed %s MiB, available %s MiB" %
7088                                (node, reason, requested, free_mem),
7089                                errors.ECODE_NORES)
7090   return free_mem
7091
7092
7093 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7094   """Checks if nodes have enough free disk space in all the VGs.
7095
7096   This function checks if all given nodes have the needed amount of
7097   free disk. In case any node has less disk or we cannot get the
7098   information from the node, this function raises an OpPrereqError
7099   exception.
7100
7101   @type lu: C{LogicalUnit}
7102   @param lu: a logical unit from which we get configuration data
7103   @type nodenames: C{list}
7104   @param nodenames: the list of node names to check
7105   @type req_sizes: C{dict}
7106   @param req_sizes: the hash of vg and corresponding amount of disk in
7107       MiB to check for
7108   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7109       or we cannot check the node
7110
7111   """
7112   for vg, req_size in req_sizes.items():
7113     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7114
7115
7116 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7117   """Checks if nodes have enough free disk space in the specified VG.
7118
7119   This function checks if all given nodes have the needed amount of
7120   free disk. In case any node has less disk or we cannot get the
7121   information from the node, this function raises an OpPrereqError
7122   exception.
7123
7124   @type lu: C{LogicalUnit}
7125   @param lu: a logical unit from which we get configuration data
7126   @type nodenames: C{list}
7127   @param nodenames: the list of node names to check
7128   @type vg: C{str}
7129   @param vg: the volume group to check
7130   @type requested: C{int}
7131   @param requested: the amount of disk in MiB to check for
7132   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7133       or we cannot check the node
7134
7135   """
7136   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7137   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7138   for node in nodenames:
7139     info = nodeinfo[node]
7140     info.Raise("Cannot get current information from node %s" % node,
7141                prereq=True, ecode=errors.ECODE_ENVIRON)
7142     (_, (vg_info, ), _) = info.payload
7143     vg_free = vg_info.get("vg_free", None)
7144     if not isinstance(vg_free, int):
7145       raise errors.OpPrereqError("Can't compute free disk space on node"
7146                                  " %s for vg %s, result was '%s'" %
7147                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7148     if requested > vg_free:
7149       raise errors.OpPrereqError("Not enough disk space on target node %s"
7150                                  " vg %s: required %d MiB, available %d MiB" %
7151                                  (node, vg, requested, vg_free),
7152                                  errors.ECODE_NORES)
7153
7154
7155 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7156   """Checks if nodes have enough physical CPUs
7157
7158   This function checks if all given nodes have the needed number of
7159   physical CPUs. In case any node has less CPUs or we cannot get the
7160   information from the node, this function raises an OpPrereqError
7161   exception.
7162
7163   @type lu: C{LogicalUnit}
7164   @param lu: a logical unit from which we get configuration data
7165   @type nodenames: C{list}
7166   @param nodenames: the list of node names to check
7167   @type requested: C{int}
7168   @param requested: the minimum acceptable number of physical CPUs
7169   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7170       or we cannot check the node
7171
7172   """
7173   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7174   for node in nodenames:
7175     info = nodeinfo[node]
7176     info.Raise("Cannot get current information from node %s" % node,
7177                prereq=True, ecode=errors.ECODE_ENVIRON)
7178     (_, _, (hv_info, )) = info.payload
7179     num_cpus = hv_info.get("cpu_total", None)
7180     if not isinstance(num_cpus, int):
7181       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7182                                  " on node %s, result was '%s'" %
7183                                  (node, num_cpus), errors.ECODE_ENVIRON)
7184     if requested > num_cpus:
7185       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7186                                  "required" % (node, num_cpus, requested),
7187                                  errors.ECODE_NORES)
7188
7189
7190 class LUInstanceStartup(LogicalUnit):
7191   """Starts an instance.
7192
7193   """
7194   HPATH = "instance-start"
7195   HTYPE = constants.HTYPE_INSTANCE
7196   REQ_BGL = False
7197
7198   def CheckArguments(self):
7199     # extra beparams
7200     if self.op.beparams:
7201       # fill the beparams dict
7202       objects.UpgradeBeParams(self.op.beparams)
7203       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7204
7205   def ExpandNames(self):
7206     self._ExpandAndLockInstance()
7207     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7208
7209   def DeclareLocks(self, level):
7210     if level == locking.LEVEL_NODE_RES:
7211       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7212
7213   def BuildHooksEnv(self):
7214     """Build hooks env.
7215
7216     This runs on master, primary and secondary nodes of the instance.
7217
7218     """
7219     env = {
7220       "FORCE": self.op.force,
7221       }
7222
7223     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7224
7225     return env
7226
7227   def BuildHooksNodes(self):
7228     """Build hooks nodes.
7229
7230     """
7231     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7232     return (nl, nl)
7233
7234   def CheckPrereq(self):
7235     """Check prerequisites.
7236
7237     This checks that the instance is in the cluster.
7238
7239     """
7240     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7241     assert self.instance is not None, \
7242       "Cannot retrieve locked instance %s" % self.op.instance_name
7243
7244     # extra hvparams
7245     if self.op.hvparams:
7246       # check hypervisor parameter syntax (locally)
7247       cluster = self.cfg.GetClusterInfo()
7248       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7249       filled_hvp = cluster.FillHV(instance)
7250       filled_hvp.update(self.op.hvparams)
7251       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7252       hv_type.CheckParameterSyntax(filled_hvp)
7253       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7254
7255     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7256
7257     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7258
7259     if self.primary_offline and self.op.ignore_offline_nodes:
7260       self.LogWarning("Ignoring offline primary node")
7261
7262       if self.op.hvparams or self.op.beparams:
7263         self.LogWarning("Overridden parameters are ignored")
7264     else:
7265       _CheckNodeOnline(self, instance.primary_node)
7266
7267       bep = self.cfg.GetClusterInfo().FillBE(instance)
7268       bep.update(self.op.beparams)
7269
7270       # check bridges existence
7271       _CheckInstanceBridgesExist(self, instance)
7272
7273       remote_info = self.rpc.call_instance_info(instance.primary_node,
7274                                                 instance.name,
7275                                                 instance.hypervisor)
7276       remote_info.Raise("Error checking node %s" % instance.primary_node,
7277                         prereq=True, ecode=errors.ECODE_ENVIRON)
7278       if not remote_info.payload: # not running already
7279         _CheckNodeFreeMemory(self, instance.primary_node,
7280                              "starting instance %s" % instance.name,
7281                              bep[constants.BE_MINMEM], instance.hypervisor)
7282
7283   def Exec(self, feedback_fn):
7284     """Start the instance.
7285
7286     """
7287     instance = self.instance
7288     force = self.op.force
7289
7290     if not self.op.no_remember:
7291       self.cfg.MarkInstanceUp(instance.name)
7292
7293     if self.primary_offline:
7294       assert self.op.ignore_offline_nodes
7295       self.LogInfo("Primary node offline, marked instance as started")
7296     else:
7297       node_current = instance.primary_node
7298
7299       _StartInstanceDisks(self, instance, force)
7300
7301       result = \
7302         self.rpc.call_instance_start(node_current,
7303                                      (instance, self.op.hvparams,
7304                                       self.op.beparams),
7305                                      self.op.startup_paused)
7306       msg = result.fail_msg
7307       if msg:
7308         _ShutdownInstanceDisks(self, instance)
7309         raise errors.OpExecError("Could not start instance: %s" % msg)
7310
7311
7312 class LUInstanceReboot(LogicalUnit):
7313   """Reboot an instance.
7314
7315   """
7316   HPATH = "instance-reboot"
7317   HTYPE = constants.HTYPE_INSTANCE
7318   REQ_BGL = False
7319
7320   def ExpandNames(self):
7321     self._ExpandAndLockInstance()
7322
7323   def BuildHooksEnv(self):
7324     """Build hooks env.
7325
7326     This runs on master, primary and secondary nodes of the instance.
7327
7328     """
7329     env = {
7330       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7331       "REBOOT_TYPE": self.op.reboot_type,
7332       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7333       }
7334
7335     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7336
7337     return env
7338
7339   def BuildHooksNodes(self):
7340     """Build hooks nodes.
7341
7342     """
7343     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7344     return (nl, nl)
7345
7346   def CheckPrereq(self):
7347     """Check prerequisites.
7348
7349     This checks that the instance is in the cluster.
7350
7351     """
7352     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7353     assert self.instance is not None, \
7354       "Cannot retrieve locked instance %s" % self.op.instance_name
7355     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7356     _CheckNodeOnline(self, instance.primary_node)
7357
7358     # check bridges existence
7359     _CheckInstanceBridgesExist(self, instance)
7360
7361   def Exec(self, feedback_fn):
7362     """Reboot the instance.
7363
7364     """
7365     instance = self.instance
7366     ignore_secondaries = self.op.ignore_secondaries
7367     reboot_type = self.op.reboot_type
7368
7369     remote_info = self.rpc.call_instance_info(instance.primary_node,
7370                                               instance.name,
7371                                               instance.hypervisor)
7372     remote_info.Raise("Error checking node %s" % instance.primary_node)
7373     instance_running = bool(remote_info.payload)
7374
7375     node_current = instance.primary_node
7376
7377     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7378                                             constants.INSTANCE_REBOOT_HARD]:
7379       for disk in instance.disks:
7380         self.cfg.SetDiskID(disk, node_current)
7381       result = self.rpc.call_instance_reboot(node_current, instance,
7382                                              reboot_type,
7383                                              self.op.shutdown_timeout)
7384       result.Raise("Could not reboot instance")
7385     else:
7386       if instance_running:
7387         result = self.rpc.call_instance_shutdown(node_current, instance,
7388                                                  self.op.shutdown_timeout)
7389         result.Raise("Could not shutdown instance for full reboot")
7390         _ShutdownInstanceDisks(self, instance)
7391       else:
7392         self.LogInfo("Instance %s was already stopped, starting now",
7393                      instance.name)
7394       _StartInstanceDisks(self, instance, ignore_secondaries)
7395       result = self.rpc.call_instance_start(node_current,
7396                                             (instance, None, None), False)
7397       msg = result.fail_msg
7398       if msg:
7399         _ShutdownInstanceDisks(self, instance)
7400         raise errors.OpExecError("Could not start instance for"
7401                                  " full reboot: %s" % msg)
7402
7403     self.cfg.MarkInstanceUp(instance.name)
7404
7405
7406 class LUInstanceShutdown(LogicalUnit):
7407   """Shutdown an instance.
7408
7409   """
7410   HPATH = "instance-stop"
7411   HTYPE = constants.HTYPE_INSTANCE
7412   REQ_BGL = False
7413
7414   def ExpandNames(self):
7415     self._ExpandAndLockInstance()
7416
7417   def BuildHooksEnv(self):
7418     """Build hooks env.
7419
7420     This runs on master, primary and secondary nodes of the instance.
7421
7422     """
7423     env = _BuildInstanceHookEnvByObject(self, self.instance)
7424     env["TIMEOUT"] = self.op.timeout
7425     return env
7426
7427   def BuildHooksNodes(self):
7428     """Build hooks nodes.
7429
7430     """
7431     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7432     return (nl, nl)
7433
7434   def CheckPrereq(self):
7435     """Check prerequisites.
7436
7437     This checks that the instance is in the cluster.
7438
7439     """
7440     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7441     assert self.instance is not None, \
7442       "Cannot retrieve locked instance %s" % self.op.instance_name
7443
7444     if not self.op.force:
7445       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7446     else:
7447       self.LogWarning("Ignoring offline instance check")
7448
7449     self.primary_offline = \
7450       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7451
7452     if self.primary_offline and self.op.ignore_offline_nodes:
7453       self.LogWarning("Ignoring offline primary node")
7454     else:
7455       _CheckNodeOnline(self, self.instance.primary_node)
7456
7457   def Exec(self, feedback_fn):
7458     """Shutdown the instance.
7459
7460     """
7461     instance = self.instance
7462     node_current = instance.primary_node
7463     timeout = self.op.timeout
7464
7465     # If the instance is offline we shouldn't mark it as down, as that
7466     # resets the offline flag.
7467     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7468       self.cfg.MarkInstanceDown(instance.name)
7469
7470     if self.primary_offline:
7471       assert self.op.ignore_offline_nodes
7472       self.LogInfo("Primary node offline, marked instance as stopped")
7473     else:
7474       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7475       msg = result.fail_msg
7476       if msg:
7477         self.LogWarning("Could not shutdown instance: %s", msg)
7478
7479       _ShutdownInstanceDisks(self, instance)
7480
7481
7482 class LUInstanceReinstall(LogicalUnit):
7483   """Reinstall an instance.
7484
7485   """
7486   HPATH = "instance-reinstall"
7487   HTYPE = constants.HTYPE_INSTANCE
7488   REQ_BGL = False
7489
7490   def ExpandNames(self):
7491     self._ExpandAndLockInstance()
7492
7493   def BuildHooksEnv(self):
7494     """Build hooks env.
7495
7496     This runs on master, primary and secondary nodes of the instance.
7497
7498     """
7499     return _BuildInstanceHookEnvByObject(self, self.instance)
7500
7501   def BuildHooksNodes(self):
7502     """Build hooks nodes.
7503
7504     """
7505     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7506     return (nl, nl)
7507
7508   def CheckPrereq(self):
7509     """Check prerequisites.
7510
7511     This checks that the instance is in the cluster and is not running.
7512
7513     """
7514     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7515     assert instance is not None, \
7516       "Cannot retrieve locked instance %s" % self.op.instance_name
7517     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7518                      " offline, cannot reinstall")
7519
7520     if instance.disk_template == constants.DT_DISKLESS:
7521       raise errors.OpPrereqError("Instance '%s' has no disks" %
7522                                  self.op.instance_name,
7523                                  errors.ECODE_INVAL)
7524     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7525
7526     if self.op.os_type is not None:
7527       # OS verification
7528       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7529       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7530       instance_os = self.op.os_type
7531     else:
7532       instance_os = instance.os
7533
7534     nodelist = list(instance.all_nodes)
7535
7536     if self.op.osparams:
7537       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7538       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7539       self.os_inst = i_osdict # the new dict (without defaults)
7540     else:
7541       self.os_inst = None
7542
7543     self.instance = instance
7544
7545   def Exec(self, feedback_fn):
7546     """Reinstall the instance.
7547
7548     """
7549     inst = self.instance
7550
7551     if self.op.os_type is not None:
7552       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7553       inst.os = self.op.os_type
7554       # Write to configuration
7555       self.cfg.Update(inst, feedback_fn)
7556
7557     _StartInstanceDisks(self, inst, None)
7558     try:
7559       feedback_fn("Running the instance OS create scripts...")
7560       # FIXME: pass debug option from opcode to backend
7561       result = self.rpc.call_instance_os_add(inst.primary_node,
7562                                              (inst, self.os_inst), True,
7563                                              self.op.debug_level)
7564       result.Raise("Could not install OS for instance %s on node %s" %
7565                    (inst.name, inst.primary_node))
7566     finally:
7567       _ShutdownInstanceDisks(self, inst)
7568
7569
7570 class LUInstanceRecreateDisks(LogicalUnit):
7571   """Recreate an instance's missing disks.
7572
7573   """
7574   HPATH = "instance-recreate-disks"
7575   HTYPE = constants.HTYPE_INSTANCE
7576   REQ_BGL = False
7577
7578   _MODIFYABLE = compat.UniqueFrozenset([
7579     constants.IDISK_SIZE,
7580     constants.IDISK_MODE,
7581     ])
7582
7583   # New or changed disk parameters may have different semantics
7584   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7585     constants.IDISK_ADOPT,
7586
7587     # TODO: Implement support changing VG while recreating
7588     constants.IDISK_VG,
7589     constants.IDISK_METAVG,
7590     constants.IDISK_PROVIDER,
7591     ]))
7592
7593   def _RunAllocator(self):
7594     """Run the allocator based on input opcode.
7595
7596     """
7597     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7598
7599     # FIXME
7600     # The allocator should actually run in "relocate" mode, but current
7601     # allocators don't support relocating all the nodes of an instance at
7602     # the same time. As a workaround we use "allocate" mode, but this is
7603     # suboptimal for two reasons:
7604     # - The instance name passed to the allocator is present in the list of
7605     #   existing instances, so there could be a conflict within the
7606     #   internal structures of the allocator. This doesn't happen with the
7607     #   current allocators, but it's a liability.
7608     # - The allocator counts the resources used by the instance twice: once
7609     #   because the instance exists already, and once because it tries to
7610     #   allocate a new instance.
7611     # The allocator could choose some of the nodes on which the instance is
7612     # running, but that's not a problem. If the instance nodes are broken,
7613     # they should be already be marked as drained or offline, and hence
7614     # skipped by the allocator. If instance disks have been lost for other
7615     # reasons, then recreating the disks on the same nodes should be fine.
7616     disk_template = self.instance.disk_template
7617     spindle_use = be_full[constants.BE_SPINDLE_USE]
7618     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7619                                         disk_template=disk_template,
7620                                         tags=list(self.instance.GetTags()),
7621                                         os=self.instance.os,
7622                                         nics=[{}],
7623                                         vcpus=be_full[constants.BE_VCPUS],
7624                                         memory=be_full[constants.BE_MAXMEM],
7625                                         spindle_use=spindle_use,
7626                                         disks=[{constants.IDISK_SIZE: d.size,
7627                                                 constants.IDISK_MODE: d.mode}
7628                                                 for d in self.instance.disks],
7629                                         hypervisor=self.instance.hypervisor,
7630                                         node_whitelist=None)
7631     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7632
7633     ial.Run(self.op.iallocator)
7634
7635     assert req.RequiredNodes() == len(self.instance.all_nodes)
7636
7637     if not ial.success:
7638       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7639                                  " %s" % (self.op.iallocator, ial.info),
7640                                  errors.ECODE_NORES)
7641
7642     self.op.nodes = ial.result
7643     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7644                  self.op.instance_name, self.op.iallocator,
7645                  utils.CommaJoin(ial.result))
7646
7647   def CheckArguments(self):
7648     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7649       # Normalize and convert deprecated list of disk indices
7650       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7651
7652     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7653     if duplicates:
7654       raise errors.OpPrereqError("Some disks have been specified more than"
7655                                  " once: %s" % utils.CommaJoin(duplicates),
7656                                  errors.ECODE_INVAL)
7657
7658     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7659     # when neither iallocator nor nodes are specified
7660     if self.op.iallocator or self.op.nodes:
7661       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7662
7663     for (idx, params) in self.op.disks:
7664       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7665       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7666       if unsupported:
7667         raise errors.OpPrereqError("Parameters for disk %s try to change"
7668                                    " unmodifyable parameter(s): %s" %
7669                                    (idx, utils.CommaJoin(unsupported)),
7670                                    errors.ECODE_INVAL)
7671
7672   def ExpandNames(self):
7673     self._ExpandAndLockInstance()
7674     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7675
7676     if self.op.nodes:
7677       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7678       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7679     else:
7680       self.needed_locks[locking.LEVEL_NODE] = []
7681       if self.op.iallocator:
7682         # iallocator will select a new node in the same group
7683         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7684         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7685
7686     self.needed_locks[locking.LEVEL_NODE_RES] = []
7687
7688   def DeclareLocks(self, level):
7689     if level == locking.LEVEL_NODEGROUP:
7690       assert self.op.iallocator is not None
7691       assert not self.op.nodes
7692       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7693       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7694       # Lock the primary group used by the instance optimistically; this
7695       # requires going via the node before it's locked, requiring
7696       # verification later on
7697       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7698         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7699
7700     elif level == locking.LEVEL_NODE:
7701       # If an allocator is used, then we lock all the nodes in the current
7702       # instance group, as we don't know yet which ones will be selected;
7703       # if we replace the nodes without using an allocator, locks are
7704       # already declared in ExpandNames; otherwise, we need to lock all the
7705       # instance nodes for disk re-creation
7706       if self.op.iallocator:
7707         assert not self.op.nodes
7708         assert not self.needed_locks[locking.LEVEL_NODE]
7709         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7710
7711         # Lock member nodes of the group of the primary node
7712         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7713           self.needed_locks[locking.LEVEL_NODE].extend(
7714             self.cfg.GetNodeGroup(group_uuid).members)
7715
7716         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7717       elif not self.op.nodes:
7718         self._LockInstancesNodes(primary_only=False)
7719     elif level == locking.LEVEL_NODE_RES:
7720       # Copy node locks
7721       self.needed_locks[locking.LEVEL_NODE_RES] = \
7722         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7723
7724   def BuildHooksEnv(self):
7725     """Build hooks env.
7726
7727     This runs on master, primary and secondary nodes of the instance.
7728
7729     """
7730     return _BuildInstanceHookEnvByObject(self, self.instance)
7731
7732   def BuildHooksNodes(self):
7733     """Build hooks nodes.
7734
7735     """
7736     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7737     return (nl, nl)
7738
7739   def CheckPrereq(self):
7740     """Check prerequisites.
7741
7742     This checks that the instance is in the cluster and is not running.
7743
7744     """
7745     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7746     assert instance is not None, \
7747       "Cannot retrieve locked instance %s" % self.op.instance_name
7748     if self.op.nodes:
7749       if len(self.op.nodes) != len(instance.all_nodes):
7750         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7751                                    " %d replacement nodes were specified" %
7752                                    (instance.name, len(instance.all_nodes),
7753                                     len(self.op.nodes)),
7754                                    errors.ECODE_INVAL)
7755       assert instance.disk_template != constants.DT_DRBD8 or \
7756           len(self.op.nodes) == 2
7757       assert instance.disk_template != constants.DT_PLAIN or \
7758           len(self.op.nodes) == 1
7759       primary_node = self.op.nodes[0]
7760     else:
7761       primary_node = instance.primary_node
7762     if not self.op.iallocator:
7763       _CheckNodeOnline(self, primary_node)
7764
7765     if instance.disk_template == constants.DT_DISKLESS:
7766       raise errors.OpPrereqError("Instance '%s' has no disks" %
7767                                  self.op.instance_name, errors.ECODE_INVAL)
7768
7769     # Verify if node group locks are still correct
7770     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7771     if owned_groups:
7772       # Node group locks are acquired only for the primary node (and only
7773       # when the allocator is used)
7774       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7775                                primary_only=True)
7776
7777     # if we replace nodes *and* the old primary is offline, we don't
7778     # check the instance state
7779     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7780     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7781       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7782                           msg="cannot recreate disks")
7783
7784     if self.op.disks:
7785       self.disks = dict(self.op.disks)
7786     else:
7787       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7788
7789     maxidx = max(self.disks.keys())
7790     if maxidx >= len(instance.disks):
7791       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7792                                  errors.ECODE_INVAL)
7793
7794     if ((self.op.nodes or self.op.iallocator) and
7795         sorted(self.disks.keys()) != range(len(instance.disks))):
7796       raise errors.OpPrereqError("Can't recreate disks partially and"
7797                                  " change the nodes at the same time",
7798                                  errors.ECODE_INVAL)
7799
7800     self.instance = instance
7801
7802     if self.op.iallocator:
7803       self._RunAllocator()
7804       # Release unneeded node and node resource locks
7805       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7806       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7807       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7808
7809     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7810
7811   def Exec(self, feedback_fn):
7812     """Recreate the disks.
7813
7814     """
7815     instance = self.instance
7816
7817     assert (self.owned_locks(locking.LEVEL_NODE) ==
7818             self.owned_locks(locking.LEVEL_NODE_RES))
7819
7820     to_skip = []
7821     mods = [] # keeps track of needed changes
7822
7823     for idx, disk in enumerate(instance.disks):
7824       try:
7825         changes = self.disks[idx]
7826       except KeyError:
7827         # Disk should not be recreated
7828         to_skip.append(idx)
7829         continue
7830
7831       # update secondaries for disks, if needed
7832       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7833         # need to update the nodes and minors
7834         assert len(self.op.nodes) == 2
7835         assert len(disk.logical_id) == 6 # otherwise disk internals
7836                                          # have changed
7837         (_, _, old_port, _, _, old_secret) = disk.logical_id
7838         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7839         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7840                   new_minors[0], new_minors[1], old_secret)
7841         assert len(disk.logical_id) == len(new_id)
7842       else:
7843         new_id = None
7844
7845       mods.append((idx, new_id, changes))
7846
7847     # now that we have passed all asserts above, we can apply the mods
7848     # in a single run (to avoid partial changes)
7849     for idx, new_id, changes in mods:
7850       disk = instance.disks[idx]
7851       if new_id is not None:
7852         assert disk.dev_type == constants.LD_DRBD8
7853         disk.logical_id = new_id
7854       if changes:
7855         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7856                     mode=changes.get(constants.IDISK_MODE, None))
7857
7858     # change primary node, if needed
7859     if self.op.nodes:
7860       instance.primary_node = self.op.nodes[0]
7861       self.LogWarning("Changing the instance's nodes, you will have to"
7862                       " remove any disks left on the older nodes manually")
7863
7864     if self.op.nodes:
7865       self.cfg.Update(instance, feedback_fn)
7866
7867     # All touched nodes must be locked
7868     mylocks = self.owned_locks(locking.LEVEL_NODE)
7869     assert mylocks.issuperset(frozenset(instance.all_nodes))
7870     _CreateDisks(self, instance, to_skip=to_skip)
7871
7872
7873 class LUInstanceRename(LogicalUnit):
7874   """Rename an instance.
7875
7876   """
7877   HPATH = "instance-rename"
7878   HTYPE = constants.HTYPE_INSTANCE
7879
7880   def CheckArguments(self):
7881     """Check arguments.
7882
7883     """
7884     if self.op.ip_check and not self.op.name_check:
7885       # TODO: make the ip check more flexible and not depend on the name check
7886       raise errors.OpPrereqError("IP address check requires a name check",
7887                                  errors.ECODE_INVAL)
7888
7889   def BuildHooksEnv(self):
7890     """Build hooks env.
7891
7892     This runs on master, primary and secondary nodes of the instance.
7893
7894     """
7895     env = _BuildInstanceHookEnvByObject(self, self.instance)
7896     env["INSTANCE_NEW_NAME"] = self.op.new_name
7897     return env
7898
7899   def BuildHooksNodes(self):
7900     """Build hooks nodes.
7901
7902     """
7903     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7904     return (nl, nl)
7905
7906   def CheckPrereq(self):
7907     """Check prerequisites.
7908
7909     This checks that the instance is in the cluster and is not running.
7910
7911     """
7912     self.op.instance_name = _ExpandInstanceName(self.cfg,
7913                                                 self.op.instance_name)
7914     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7915     assert instance is not None
7916     _CheckNodeOnline(self, instance.primary_node)
7917     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7918                         msg="cannot rename")
7919     self.instance = instance
7920
7921     new_name = self.op.new_name
7922     if self.op.name_check:
7923       hostname = _CheckHostnameSane(self, new_name)
7924       new_name = self.op.new_name = hostname.name
7925       if (self.op.ip_check and
7926           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7927         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7928                                    (hostname.ip, new_name),
7929                                    errors.ECODE_NOTUNIQUE)
7930
7931     instance_list = self.cfg.GetInstanceList()
7932     if new_name in instance_list and new_name != instance.name:
7933       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7934                                  new_name, errors.ECODE_EXISTS)
7935
7936   def Exec(self, feedback_fn):
7937     """Rename the instance.
7938
7939     """
7940     inst = self.instance
7941     old_name = inst.name
7942
7943     rename_file_storage = False
7944     if (inst.disk_template in constants.DTS_FILEBASED and
7945         self.op.new_name != inst.name):
7946       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7947       rename_file_storage = True
7948
7949     self.cfg.RenameInstance(inst.name, self.op.new_name)
7950     # Change the instance lock. This is definitely safe while we hold the BGL.
7951     # Otherwise the new lock would have to be added in acquired mode.
7952     assert self.REQ_BGL
7953     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7954     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7955     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7956
7957     # re-read the instance from the configuration after rename
7958     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7959
7960     if rename_file_storage:
7961       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7963                                                      old_file_storage_dir,
7964                                                      new_file_storage_dir)
7965       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7966                    " (but the instance has been renamed in Ganeti)" %
7967                    (inst.primary_node, old_file_storage_dir,
7968                     new_file_storage_dir))
7969
7970     _StartInstanceDisks(self, inst, None)
7971     # update info on disks
7972     info = _GetInstanceInfoText(inst)
7973     for (idx, disk) in enumerate(inst.disks):
7974       for node in inst.all_nodes:
7975         self.cfg.SetDiskID(disk, node)
7976         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7977         if result.fail_msg:
7978           self.LogWarning("Error setting info on node %s for disk %s: %s",
7979                           node, idx, result.fail_msg)
7980     try:
7981       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7982                                                  old_name, self.op.debug_level)
7983       msg = result.fail_msg
7984       if msg:
7985         msg = ("Could not run OS rename script for instance %s on node %s"
7986                " (but the instance has been renamed in Ganeti): %s" %
7987                (inst.name, inst.primary_node, msg))
7988         self.LogWarning(msg)
7989     finally:
7990       _ShutdownInstanceDisks(self, inst)
7991
7992     return inst.name
7993
7994
7995 class LUInstanceRemove(LogicalUnit):
7996   """Remove an instance.
7997
7998   """
7999   HPATH = "instance-remove"
8000   HTYPE = constants.HTYPE_INSTANCE
8001   REQ_BGL = False
8002
8003   def ExpandNames(self):
8004     self._ExpandAndLockInstance()
8005     self.needed_locks[locking.LEVEL_NODE] = []
8006     self.needed_locks[locking.LEVEL_NODE_RES] = []
8007     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8008
8009   def DeclareLocks(self, level):
8010     if level == locking.LEVEL_NODE:
8011       self._LockInstancesNodes()
8012     elif level == locking.LEVEL_NODE_RES:
8013       # Copy node locks
8014       self.needed_locks[locking.LEVEL_NODE_RES] = \
8015         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8016
8017   def BuildHooksEnv(self):
8018     """Build hooks env.
8019
8020     This runs on master, primary and secondary nodes of the instance.
8021
8022     """
8023     env = _BuildInstanceHookEnvByObject(self, self.instance)
8024     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8025     return env
8026
8027   def BuildHooksNodes(self):
8028     """Build hooks nodes.
8029
8030     """
8031     nl = [self.cfg.GetMasterNode()]
8032     nl_post = list(self.instance.all_nodes) + nl
8033     return (nl, nl_post)
8034
8035   def CheckPrereq(self):
8036     """Check prerequisites.
8037
8038     This checks that the instance is in the cluster.
8039
8040     """
8041     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8042     assert self.instance is not None, \
8043       "Cannot retrieve locked instance %s" % self.op.instance_name
8044
8045   def Exec(self, feedback_fn):
8046     """Remove the instance.
8047
8048     """
8049     instance = self.instance
8050     logging.info("Shutting down instance %s on node %s",
8051                  instance.name, instance.primary_node)
8052
8053     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8054                                              self.op.shutdown_timeout)
8055     msg = result.fail_msg
8056     if msg:
8057       if self.op.ignore_failures:
8058         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8059       else:
8060         raise errors.OpExecError("Could not shutdown instance %s on"
8061                                  " node %s: %s" %
8062                                  (instance.name, instance.primary_node, msg))
8063
8064     assert (self.owned_locks(locking.LEVEL_NODE) ==
8065             self.owned_locks(locking.LEVEL_NODE_RES))
8066     assert not (set(instance.all_nodes) -
8067                 self.owned_locks(locking.LEVEL_NODE)), \
8068       "Not owning correct locks"
8069
8070     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8071
8072
8073 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8074   """Utility function to remove an instance.
8075
8076   """
8077   logging.info("Removing block devices for instance %s", instance.name)
8078
8079   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8080     if not ignore_failures:
8081       raise errors.OpExecError("Can't remove instance's disks")
8082     feedback_fn("Warning: can't remove instance's disks")
8083
8084   logging.info("Removing instance %s out of cluster config", instance.name)
8085
8086   lu.cfg.RemoveInstance(instance.name)
8087
8088   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8089     "Instance lock removal conflict"
8090
8091   # Remove lock for the instance
8092   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8093
8094
8095 class LUInstanceQuery(NoHooksLU):
8096   """Logical unit for querying instances.
8097
8098   """
8099   # pylint: disable=W0142
8100   REQ_BGL = False
8101
8102   def CheckArguments(self):
8103     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8104                              self.op.output_fields, self.op.use_locking)
8105
8106   def ExpandNames(self):
8107     self.iq.ExpandNames(self)
8108
8109   def DeclareLocks(self, level):
8110     self.iq.DeclareLocks(self, level)
8111
8112   def Exec(self, feedback_fn):
8113     return self.iq.OldStyleQuery(self)
8114
8115
8116 def _ExpandNamesForMigration(lu):
8117   """Expands names for use with L{TLMigrateInstance}.
8118
8119   @type lu: L{LogicalUnit}
8120
8121   """
8122   if lu.op.target_node is not None:
8123     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8124
8125   lu.needed_locks[locking.LEVEL_NODE] = []
8126   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8127
8128   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8129   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8130
8131   # The node allocation lock is actually only needed for replicated instances
8132   # (e.g. DRBD8) and if an iallocator is used.
8133   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8134
8135
8136 def _DeclareLocksForMigration(lu, level):
8137   """Declares locks for L{TLMigrateInstance}.
8138
8139   @type lu: L{LogicalUnit}
8140   @param level: Lock level
8141
8142   """
8143   if level == locking.LEVEL_NODE_ALLOC:
8144     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8145
8146     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8147
8148     # Node locks are already declared here rather than at LEVEL_NODE as we need
8149     # the instance object anyway to declare the node allocation lock.
8150     if instance.disk_template in constants.DTS_EXT_MIRROR:
8151       if lu.op.target_node is None:
8152         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8153         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8154       else:
8155         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8156                                                lu.op.target_node]
8157       del lu.recalculate_locks[locking.LEVEL_NODE]
8158     else:
8159       lu._LockInstancesNodes() # pylint: disable=W0212
8160
8161   elif level == locking.LEVEL_NODE:
8162     # Node locks are declared together with the node allocation lock
8163     assert (lu.needed_locks[locking.LEVEL_NODE] or
8164             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8165
8166   elif level == locking.LEVEL_NODE_RES:
8167     # Copy node locks
8168     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8169       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8170
8171
8172 class LUInstanceFailover(LogicalUnit):
8173   """Failover an instance.
8174
8175   """
8176   HPATH = "instance-failover"
8177   HTYPE = constants.HTYPE_INSTANCE
8178   REQ_BGL = False
8179
8180   def CheckArguments(self):
8181     """Check the arguments.
8182
8183     """
8184     self.iallocator = getattr(self.op, "iallocator", None)
8185     self.target_node = getattr(self.op, "target_node", None)
8186
8187   def ExpandNames(self):
8188     self._ExpandAndLockInstance()
8189     _ExpandNamesForMigration(self)
8190
8191     self._migrater = \
8192       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8193                         self.op.ignore_consistency, True,
8194                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8195
8196     self.tasklets = [self._migrater]
8197
8198   def DeclareLocks(self, level):
8199     _DeclareLocksForMigration(self, level)
8200
8201   def BuildHooksEnv(self):
8202     """Build hooks env.
8203
8204     This runs on master, primary and secondary nodes of the instance.
8205
8206     """
8207     instance = self._migrater.instance
8208     source_node = instance.primary_node
8209     target_node = self.op.target_node
8210     env = {
8211       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8212       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8213       "OLD_PRIMARY": source_node,
8214       "NEW_PRIMARY": target_node,
8215       }
8216
8217     if instance.disk_template in constants.DTS_INT_MIRROR:
8218       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8219       env["NEW_SECONDARY"] = source_node
8220     else:
8221       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8222
8223     env.update(_BuildInstanceHookEnvByObject(self, instance))
8224
8225     return env
8226
8227   def BuildHooksNodes(self):
8228     """Build hooks nodes.
8229
8230     """
8231     instance = self._migrater.instance
8232     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8233     return (nl, nl + [instance.primary_node])
8234
8235
8236 class LUInstanceMigrate(LogicalUnit):
8237   """Migrate an instance.
8238
8239   This is migration without shutting down, compared to the failover,
8240   which is done with shutdown.
8241
8242   """
8243   HPATH = "instance-migrate"
8244   HTYPE = constants.HTYPE_INSTANCE
8245   REQ_BGL = False
8246
8247   def ExpandNames(self):
8248     self._ExpandAndLockInstance()
8249     _ExpandNamesForMigration(self)
8250
8251     self._migrater = \
8252       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8253                         False, self.op.allow_failover, False,
8254                         self.op.allow_runtime_changes,
8255                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8256                         self.op.ignore_ipolicy)
8257
8258     self.tasklets = [self._migrater]
8259
8260   def DeclareLocks(self, level):
8261     _DeclareLocksForMigration(self, level)
8262
8263   def BuildHooksEnv(self):
8264     """Build hooks env.
8265
8266     This runs on master, primary and secondary nodes of the instance.
8267
8268     """
8269     instance = self._migrater.instance
8270     source_node = instance.primary_node
8271     target_node = self.op.target_node
8272     env = _BuildInstanceHookEnvByObject(self, instance)
8273     env.update({
8274       "MIGRATE_LIVE": self._migrater.live,
8275       "MIGRATE_CLEANUP": self.op.cleanup,
8276       "OLD_PRIMARY": source_node,
8277       "NEW_PRIMARY": target_node,
8278       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8279       })
8280
8281     if instance.disk_template in constants.DTS_INT_MIRROR:
8282       env["OLD_SECONDARY"] = target_node
8283       env["NEW_SECONDARY"] = source_node
8284     else:
8285       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8286
8287     return env
8288
8289   def BuildHooksNodes(self):
8290     """Build hooks nodes.
8291
8292     """
8293     instance = self._migrater.instance
8294     snodes = list(instance.secondary_nodes)
8295     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8296     return (nl, nl)
8297
8298
8299 class LUInstanceMove(LogicalUnit):
8300   """Move an instance by data-copying.
8301
8302   """
8303   HPATH = "instance-move"
8304   HTYPE = constants.HTYPE_INSTANCE
8305   REQ_BGL = False
8306
8307   def ExpandNames(self):
8308     self._ExpandAndLockInstance()
8309     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8310     self.op.target_node = target_node
8311     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8312     self.needed_locks[locking.LEVEL_NODE_RES] = []
8313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8314
8315   def DeclareLocks(self, level):
8316     if level == locking.LEVEL_NODE:
8317       self._LockInstancesNodes(primary_only=True)
8318     elif level == locking.LEVEL_NODE_RES:
8319       # Copy node locks
8320       self.needed_locks[locking.LEVEL_NODE_RES] = \
8321         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8322
8323   def BuildHooksEnv(self):
8324     """Build hooks env.
8325
8326     This runs on master, primary and secondary nodes of the instance.
8327
8328     """
8329     env = {
8330       "TARGET_NODE": self.op.target_node,
8331       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8332       }
8333     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8334     return env
8335
8336   def BuildHooksNodes(self):
8337     """Build hooks nodes.
8338
8339     """
8340     nl = [
8341       self.cfg.GetMasterNode(),
8342       self.instance.primary_node,
8343       self.op.target_node,
8344       ]
8345     return (nl, nl)
8346
8347   def CheckPrereq(self):
8348     """Check prerequisites.
8349
8350     This checks that the instance is in the cluster.
8351
8352     """
8353     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8354     assert self.instance is not None, \
8355       "Cannot retrieve locked instance %s" % self.op.instance_name
8356
8357     node = self.cfg.GetNodeInfo(self.op.target_node)
8358     assert node is not None, \
8359       "Cannot retrieve locked node %s" % self.op.target_node
8360
8361     self.target_node = target_node = node.name
8362
8363     if target_node == instance.primary_node:
8364       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8365                                  (instance.name, target_node),
8366                                  errors.ECODE_STATE)
8367
8368     bep = self.cfg.GetClusterInfo().FillBE(instance)
8369
8370     for idx, dsk in enumerate(instance.disks):
8371       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8372         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8373                                    " cannot copy" % idx, errors.ECODE_STATE)
8374
8375     _CheckNodeOnline(self, target_node)
8376     _CheckNodeNotDrained(self, target_node)
8377     _CheckNodeVmCapable(self, target_node)
8378     cluster = self.cfg.GetClusterInfo()
8379     group_info = self.cfg.GetNodeGroup(node.group)
8380     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8381     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8382                             ignore=self.op.ignore_ipolicy)
8383
8384     if instance.admin_state == constants.ADMINST_UP:
8385       # check memory requirements on the secondary node
8386       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8387                            instance.name, bep[constants.BE_MAXMEM],
8388                            instance.hypervisor)
8389     else:
8390       self.LogInfo("Not checking memory on the secondary node as"
8391                    " instance will not be started")
8392
8393     # check bridge existance
8394     _CheckInstanceBridgesExist(self, instance, node=target_node)
8395
8396   def Exec(self, feedback_fn):
8397     """Move an instance.
8398
8399     The move is done by shutting it down on its present node, copying
8400     the data over (slow) and starting it on the new node.
8401
8402     """
8403     instance = self.instance
8404
8405     source_node = instance.primary_node
8406     target_node = self.target_node
8407
8408     self.LogInfo("Shutting down instance %s on source node %s",
8409                  instance.name, source_node)
8410
8411     assert (self.owned_locks(locking.LEVEL_NODE) ==
8412             self.owned_locks(locking.LEVEL_NODE_RES))
8413
8414     result = self.rpc.call_instance_shutdown(source_node, instance,
8415                                              self.op.shutdown_timeout)
8416     msg = result.fail_msg
8417     if msg:
8418       if self.op.ignore_consistency:
8419         self.LogWarning("Could not shutdown instance %s on node %s."
8420                         " Proceeding anyway. Please make sure node"
8421                         " %s is down. Error details: %s",
8422                         instance.name, source_node, source_node, msg)
8423       else:
8424         raise errors.OpExecError("Could not shutdown instance %s on"
8425                                  " node %s: %s" %
8426                                  (instance.name, source_node, msg))
8427
8428     # create the target disks
8429     try:
8430       _CreateDisks(self, instance, target_node=target_node)
8431     except errors.OpExecError:
8432       self.LogWarning("Device creation failed, reverting...")
8433       try:
8434         _RemoveDisks(self, instance, target_node=target_node)
8435       finally:
8436         self.cfg.ReleaseDRBDMinors(instance.name)
8437         raise
8438
8439     cluster_name = self.cfg.GetClusterInfo().cluster_name
8440
8441     errs = []
8442     # activate, get path, copy the data over
8443     for idx, disk in enumerate(instance.disks):
8444       self.LogInfo("Copying data for disk %d", idx)
8445       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8446                                                instance.name, True, idx)
8447       if result.fail_msg:
8448         self.LogWarning("Can't assemble newly created disk %d: %s",
8449                         idx, result.fail_msg)
8450         errs.append(result.fail_msg)
8451         break
8452       dev_path = result.payload
8453       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8454                                              target_node, dev_path,
8455                                              cluster_name)
8456       if result.fail_msg:
8457         self.LogWarning("Can't copy data over for disk %d: %s",
8458                         idx, result.fail_msg)
8459         errs.append(result.fail_msg)
8460         break
8461
8462     if errs:
8463       self.LogWarning("Some disks failed to copy, aborting")
8464       try:
8465         _RemoveDisks(self, instance, target_node=target_node)
8466       finally:
8467         self.cfg.ReleaseDRBDMinors(instance.name)
8468         raise errors.OpExecError("Errors during disk copy: %s" %
8469                                  (",".join(errs),))
8470
8471     instance.primary_node = target_node
8472     self.cfg.Update(instance, feedback_fn)
8473
8474     self.LogInfo("Removing the disks on the original node")
8475     _RemoveDisks(self, instance, target_node=source_node)
8476
8477     # Only start the instance if it's marked as up
8478     if instance.admin_state == constants.ADMINST_UP:
8479       self.LogInfo("Starting instance %s on node %s",
8480                    instance.name, target_node)
8481
8482       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8483                                            ignore_secondaries=True)
8484       if not disks_ok:
8485         _ShutdownInstanceDisks(self, instance)
8486         raise errors.OpExecError("Can't activate the instance's disks")
8487
8488       result = self.rpc.call_instance_start(target_node,
8489                                             (instance, None, None), False)
8490       msg = result.fail_msg
8491       if msg:
8492         _ShutdownInstanceDisks(self, instance)
8493         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8494                                  (instance.name, target_node, msg))
8495
8496
8497 class LUNodeMigrate(LogicalUnit):
8498   """Migrate all instances from a node.
8499
8500   """
8501   HPATH = "node-migrate"
8502   HTYPE = constants.HTYPE_NODE
8503   REQ_BGL = False
8504
8505   def CheckArguments(self):
8506     pass
8507
8508   def ExpandNames(self):
8509     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8510
8511     self.share_locks = _ShareAll()
8512     self.needed_locks = {
8513       locking.LEVEL_NODE: [self.op.node_name],
8514       }
8515
8516   def BuildHooksEnv(self):
8517     """Build hooks env.
8518
8519     This runs on the master, the primary and all the secondaries.
8520
8521     """
8522     return {
8523       "NODE_NAME": self.op.node_name,
8524       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8525       }
8526
8527   def BuildHooksNodes(self):
8528     """Build hooks nodes.
8529
8530     """
8531     nl = [self.cfg.GetMasterNode()]
8532     return (nl, nl)
8533
8534   def CheckPrereq(self):
8535     pass
8536
8537   def Exec(self, feedback_fn):
8538     # Prepare jobs for migration instances
8539     allow_runtime_changes = self.op.allow_runtime_changes
8540     jobs = [
8541       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8542                                  mode=self.op.mode,
8543                                  live=self.op.live,
8544                                  iallocator=self.op.iallocator,
8545                                  target_node=self.op.target_node,
8546                                  allow_runtime_changes=allow_runtime_changes,
8547                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8548       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8549
8550     # TODO: Run iallocator in this opcode and pass correct placement options to
8551     # OpInstanceMigrate. Since other jobs can modify the cluster between
8552     # running the iallocator and the actual migration, a good consistency model
8553     # will have to be found.
8554
8555     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8556             frozenset([self.op.node_name]))
8557
8558     return ResultWithJobs(jobs)
8559
8560
8561 class TLMigrateInstance(Tasklet):
8562   """Tasklet class for instance migration.
8563
8564   @type live: boolean
8565   @ivar live: whether the migration will be done live or non-live;
8566       this variable is initalized only after CheckPrereq has run
8567   @type cleanup: boolean
8568   @ivar cleanup: Wheater we cleanup from a failed migration
8569   @type iallocator: string
8570   @ivar iallocator: The iallocator used to determine target_node
8571   @type target_node: string
8572   @ivar target_node: If given, the target_node to reallocate the instance to
8573   @type failover: boolean
8574   @ivar failover: Whether operation results in failover or migration
8575   @type fallback: boolean
8576   @ivar fallback: Whether fallback to failover is allowed if migration not
8577                   possible
8578   @type ignore_consistency: boolean
8579   @ivar ignore_consistency: Wheter we should ignore consistency between source
8580                             and target node
8581   @type shutdown_timeout: int
8582   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8583   @type ignore_ipolicy: bool
8584   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8585
8586   """
8587
8588   # Constants
8589   _MIGRATION_POLL_INTERVAL = 1      # seconds
8590   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8591
8592   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8593                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8594                ignore_ipolicy):
8595     """Initializes this class.
8596
8597     """
8598     Tasklet.__init__(self, lu)
8599
8600     # Parameters
8601     self.instance_name = instance_name
8602     self.cleanup = cleanup
8603     self.live = False # will be overridden later
8604     self.failover = failover
8605     self.fallback = fallback
8606     self.ignore_consistency = ignore_consistency
8607     self.shutdown_timeout = shutdown_timeout
8608     self.ignore_ipolicy = ignore_ipolicy
8609     self.allow_runtime_changes = allow_runtime_changes
8610
8611   def CheckPrereq(self):
8612     """Check prerequisites.
8613
8614     This checks that the instance is in the cluster.
8615
8616     """
8617     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8618     instance = self.cfg.GetInstanceInfo(instance_name)
8619     assert instance is not None
8620     self.instance = instance
8621     cluster = self.cfg.GetClusterInfo()
8622
8623     if (not self.cleanup and
8624         not instance.admin_state == constants.ADMINST_UP and
8625         not self.failover and self.fallback):
8626       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8627                       " switching to failover")
8628       self.failover = True
8629
8630     if instance.disk_template not in constants.DTS_MIRRORED:
8631       if self.failover:
8632         text = "failovers"
8633       else:
8634         text = "migrations"
8635       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8636                                  " %s" % (instance.disk_template, text),
8637                                  errors.ECODE_STATE)
8638
8639     if instance.disk_template in constants.DTS_EXT_MIRROR:
8640       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8641
8642       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8643
8644       if self.lu.op.iallocator:
8645         self._RunAllocator()
8646       else:
8647         # We set set self.target_node as it is required by
8648         # BuildHooksEnv
8649         self.target_node = self.lu.op.target_node
8650
8651       # Check that the target node is correct in terms of instance policy
8652       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8653       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8654       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8655                                                               group_info)
8656       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8657                               ignore=self.ignore_ipolicy)
8658
8659       # self.target_node is already populated, either directly or by the
8660       # iallocator run
8661       target_node = self.target_node
8662       if self.target_node == instance.primary_node:
8663         raise errors.OpPrereqError("Cannot migrate instance %s"
8664                                    " to its primary (%s)" %
8665                                    (instance.name, instance.primary_node),
8666                                    errors.ECODE_STATE)
8667
8668       if len(self.lu.tasklets) == 1:
8669         # It is safe to release locks only when we're the only tasklet
8670         # in the LU
8671         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8672                       keep=[instance.primary_node, self.target_node])
8673         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8674
8675     else:
8676       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8677
8678       secondary_nodes = instance.secondary_nodes
8679       if not secondary_nodes:
8680         raise errors.ConfigurationError("No secondary node but using"
8681                                         " %s disk template" %
8682                                         instance.disk_template)
8683       target_node = secondary_nodes[0]
8684       if self.lu.op.iallocator or (self.lu.op.target_node and
8685                                    self.lu.op.target_node != target_node):
8686         if self.failover:
8687           text = "failed over"
8688         else:
8689           text = "migrated"
8690         raise errors.OpPrereqError("Instances with disk template %s cannot"
8691                                    " be %s to arbitrary nodes"
8692                                    " (neither an iallocator nor a target"
8693                                    " node can be passed)" %
8694                                    (instance.disk_template, text),
8695                                    errors.ECODE_INVAL)
8696       nodeinfo = self.cfg.GetNodeInfo(target_node)
8697       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8698       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8699                                                               group_info)
8700       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8701                               ignore=self.ignore_ipolicy)
8702
8703     i_be = cluster.FillBE(instance)
8704
8705     # check memory requirements on the secondary node
8706     if (not self.cleanup and
8707          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8708       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8709                                                "migrating instance %s" %
8710                                                instance.name,
8711                                                i_be[constants.BE_MINMEM],
8712                                                instance.hypervisor)
8713     else:
8714       self.lu.LogInfo("Not checking memory on the secondary node as"
8715                       " instance will not be started")
8716
8717     # check if failover must be forced instead of migration
8718     if (not self.cleanup and not self.failover and
8719         i_be[constants.BE_ALWAYS_FAILOVER]):
8720       self.lu.LogInfo("Instance configured to always failover; fallback"
8721                       " to failover")
8722       self.failover = True
8723
8724     # check bridge existance
8725     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8726
8727     if not self.cleanup:
8728       _CheckNodeNotDrained(self.lu, target_node)
8729       if not self.failover:
8730         result = self.rpc.call_instance_migratable(instance.primary_node,
8731                                                    instance)
8732         if result.fail_msg and self.fallback:
8733           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8734                           " failover")
8735           self.failover = True
8736         else:
8737           result.Raise("Can't migrate, please use failover",
8738                        prereq=True, ecode=errors.ECODE_STATE)
8739
8740     assert not (self.failover and self.cleanup)
8741
8742     if not self.failover:
8743       if self.lu.op.live is not None and self.lu.op.mode is not None:
8744         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8745                                    " parameters are accepted",
8746                                    errors.ECODE_INVAL)
8747       if self.lu.op.live is not None:
8748         if self.lu.op.live:
8749           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8750         else:
8751           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8752         # reset the 'live' parameter to None so that repeated
8753         # invocations of CheckPrereq do not raise an exception
8754         self.lu.op.live = None
8755       elif self.lu.op.mode is None:
8756         # read the default value from the hypervisor
8757         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8758         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8759
8760       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8761     else:
8762       # Failover is never live
8763       self.live = False
8764
8765     if not (self.failover or self.cleanup):
8766       remote_info = self.rpc.call_instance_info(instance.primary_node,
8767                                                 instance.name,
8768                                                 instance.hypervisor)
8769       remote_info.Raise("Error checking instance on node %s" %
8770                         instance.primary_node)
8771       instance_running = bool(remote_info.payload)
8772       if instance_running:
8773         self.current_mem = int(remote_info.payload["memory"])
8774
8775   def _RunAllocator(self):
8776     """Run the allocator based on input opcode.
8777
8778     """
8779     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8780
8781     # FIXME: add a self.ignore_ipolicy option
8782     req = iallocator.IAReqRelocate(name=self.instance_name,
8783                                    relocate_from=[self.instance.primary_node])
8784     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8785
8786     ial.Run(self.lu.op.iallocator)
8787
8788     if not ial.success:
8789       raise errors.OpPrereqError("Can't compute nodes using"
8790                                  " iallocator '%s': %s" %
8791                                  (self.lu.op.iallocator, ial.info),
8792                                  errors.ECODE_NORES)
8793     self.target_node = ial.result[0]
8794     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8795                     self.instance_name, self.lu.op.iallocator,
8796                     utils.CommaJoin(ial.result))
8797
8798   def _WaitUntilSync(self):
8799     """Poll with custom rpc for disk sync.
8800
8801     This uses our own step-based rpc call.
8802
8803     """
8804     self.feedback_fn("* wait until resync is done")
8805     all_done = False
8806     while not all_done:
8807       all_done = True
8808       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8809                                             self.nodes_ip,
8810                                             (self.instance.disks,
8811                                              self.instance))
8812       min_percent = 100
8813       for node, nres in result.items():
8814         nres.Raise("Cannot resync disks on node %s" % node)
8815         node_done, node_percent = nres.payload
8816         all_done = all_done and node_done
8817         if node_percent is not None:
8818           min_percent = min(min_percent, node_percent)
8819       if not all_done:
8820         if min_percent < 100:
8821           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8822         time.sleep(2)
8823
8824   def _EnsureSecondary(self, node):
8825     """Demote a node to secondary.
8826
8827     """
8828     self.feedback_fn("* switching node %s to secondary mode" % node)
8829
8830     for dev in self.instance.disks:
8831       self.cfg.SetDiskID(dev, node)
8832
8833     result = self.rpc.call_blockdev_close(node, self.instance.name,
8834                                           self.instance.disks)
8835     result.Raise("Cannot change disk to secondary on node %s" % node)
8836
8837   def _GoStandalone(self):
8838     """Disconnect from the network.
8839
8840     """
8841     self.feedback_fn("* changing into standalone mode")
8842     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8843                                                self.instance.disks)
8844     for node, nres in result.items():
8845       nres.Raise("Cannot disconnect disks node %s" % node)
8846
8847   def _GoReconnect(self, multimaster):
8848     """Reconnect to the network.
8849
8850     """
8851     if multimaster:
8852       msg = "dual-master"
8853     else:
8854       msg = "single-master"
8855     self.feedback_fn("* changing disks into %s mode" % msg)
8856     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8857                                            (self.instance.disks, self.instance),
8858                                            self.instance.name, multimaster)
8859     for node, nres in result.items():
8860       nres.Raise("Cannot change disks config on node %s" % node)
8861
8862   def _ExecCleanup(self):
8863     """Try to cleanup after a failed migration.
8864
8865     The cleanup is done by:
8866       - check that the instance is running only on one node
8867         (and update the config if needed)
8868       - change disks on its secondary node to secondary
8869       - wait until disks are fully synchronized
8870       - disconnect from the network
8871       - change disks into single-master mode
8872       - wait again until disks are fully synchronized
8873
8874     """
8875     instance = self.instance
8876     target_node = self.target_node
8877     source_node = self.source_node
8878
8879     # check running on only one node
8880     self.feedback_fn("* checking where the instance actually runs"
8881                      " (if this hangs, the hypervisor might be in"
8882                      " a bad state)")
8883     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8884     for node, result in ins_l.items():
8885       result.Raise("Can't contact node %s" % node)
8886
8887     runningon_source = instance.name in ins_l[source_node].payload
8888     runningon_target = instance.name in ins_l[target_node].payload
8889
8890     if runningon_source and runningon_target:
8891       raise errors.OpExecError("Instance seems to be running on two nodes,"
8892                                " or the hypervisor is confused; you will have"
8893                                " to ensure manually that it runs only on one"
8894                                " and restart this operation")
8895
8896     if not (runningon_source or runningon_target):
8897       raise errors.OpExecError("Instance does not seem to be running at all;"
8898                                " in this case it's safer to repair by"
8899                                " running 'gnt-instance stop' to ensure disk"
8900                                " shutdown, and then restarting it")
8901
8902     if runningon_target:
8903       # the migration has actually succeeded, we need to update the config
8904       self.feedback_fn("* instance running on secondary node (%s),"
8905                        " updating config" % target_node)
8906       instance.primary_node = target_node
8907       self.cfg.Update(instance, self.feedback_fn)
8908       demoted_node = source_node
8909     else:
8910       self.feedback_fn("* instance confirmed to be running on its"
8911                        " primary node (%s)" % source_node)
8912       demoted_node = target_node
8913
8914     if instance.disk_template in constants.DTS_INT_MIRROR:
8915       self._EnsureSecondary(demoted_node)
8916       try:
8917         self._WaitUntilSync()
8918       except errors.OpExecError:
8919         # we ignore here errors, since if the device is standalone, it
8920         # won't be able to sync
8921         pass
8922       self._GoStandalone()
8923       self._GoReconnect(False)
8924       self._WaitUntilSync()
8925
8926     self.feedback_fn("* done")
8927
8928   def _RevertDiskStatus(self):
8929     """Try to revert the disk status after a failed migration.
8930
8931     """
8932     target_node = self.target_node
8933     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8934       return
8935
8936     try:
8937       self._EnsureSecondary(target_node)
8938       self._GoStandalone()
8939       self._GoReconnect(False)
8940       self._WaitUntilSync()
8941     except errors.OpExecError, err:
8942       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8943                          " please try to recover the instance manually;"
8944                          " error '%s'" % str(err))
8945
8946   def _AbortMigration(self):
8947     """Call the hypervisor code to abort a started migration.
8948
8949     """
8950     instance = self.instance
8951     target_node = self.target_node
8952     source_node = self.source_node
8953     migration_info = self.migration_info
8954
8955     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8956                                                                  instance,
8957                                                                  migration_info,
8958                                                                  False)
8959     abort_msg = abort_result.fail_msg
8960     if abort_msg:
8961       logging.error("Aborting migration failed on target node %s: %s",
8962                     target_node, abort_msg)
8963       # Don't raise an exception here, as we stil have to try to revert the
8964       # disk status, even if this step failed.
8965
8966     abort_result = self.rpc.call_instance_finalize_migration_src(
8967       source_node, instance, False, self.live)
8968     abort_msg = abort_result.fail_msg
8969     if abort_msg:
8970       logging.error("Aborting migration failed on source node %s: %s",
8971                     source_node, abort_msg)
8972
8973   def _ExecMigration(self):
8974     """Migrate an instance.
8975
8976     The migrate is done by:
8977       - change the disks into dual-master mode
8978       - wait until disks are fully synchronized again
8979       - migrate the instance
8980       - change disks on the new secondary node (the old primary) to secondary
8981       - wait until disks are fully synchronized
8982       - change disks into single-master mode
8983
8984     """
8985     instance = self.instance
8986     target_node = self.target_node
8987     source_node = self.source_node
8988
8989     # Check for hypervisor version mismatch and warn the user.
8990     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8991                                        None, [self.instance.hypervisor], False)
8992     for ninfo in nodeinfo.values():
8993       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8994                   ninfo.node)
8995     (_, _, (src_info, )) = nodeinfo[source_node].payload
8996     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8997
8998     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8999         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9000       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9001       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9002       if src_version != dst_version:
9003         self.feedback_fn("* warning: hypervisor version mismatch between"
9004                          " source (%s) and target (%s) node" %
9005                          (src_version, dst_version))
9006
9007     self.feedback_fn("* checking disk consistency between source and target")
9008     for (idx, dev) in enumerate(instance.disks):
9009       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9010         raise errors.OpExecError("Disk %s is degraded or not fully"
9011                                  " synchronized on target node,"
9012                                  " aborting migration" % idx)
9013
9014     if self.current_mem > self.tgt_free_mem:
9015       if not self.allow_runtime_changes:
9016         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9017                                  " free memory to fit instance %s on target"
9018                                  " node %s (have %dMB, need %dMB)" %
9019                                  (instance.name, target_node,
9020                                   self.tgt_free_mem, self.current_mem))
9021       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9022       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9023                                                      instance,
9024                                                      self.tgt_free_mem)
9025       rpcres.Raise("Cannot modify instance runtime memory")
9026
9027     # First get the migration information from the remote node
9028     result = self.rpc.call_migration_info(source_node, instance)
9029     msg = result.fail_msg
9030     if msg:
9031       log_err = ("Failed fetching source migration information from %s: %s" %
9032                  (source_node, msg))
9033       logging.error(log_err)
9034       raise errors.OpExecError(log_err)
9035
9036     self.migration_info = migration_info = result.payload
9037
9038     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9039       # Then switch the disks to master/master mode
9040       self._EnsureSecondary(target_node)
9041       self._GoStandalone()
9042       self._GoReconnect(True)
9043       self._WaitUntilSync()
9044
9045     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9046     result = self.rpc.call_accept_instance(target_node,
9047                                            instance,
9048                                            migration_info,
9049                                            self.nodes_ip[target_node])
9050
9051     msg = result.fail_msg
9052     if msg:
9053       logging.error("Instance pre-migration failed, trying to revert"
9054                     " disk status: %s", msg)
9055       self.feedback_fn("Pre-migration failed, aborting")
9056       self._AbortMigration()
9057       self._RevertDiskStatus()
9058       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9059                                (instance.name, msg))
9060
9061     self.feedback_fn("* migrating instance to %s" % target_node)
9062     result = self.rpc.call_instance_migrate(source_node, instance,
9063                                             self.nodes_ip[target_node],
9064                                             self.live)
9065     msg = result.fail_msg
9066     if msg:
9067       logging.error("Instance migration failed, trying to revert"
9068                     " disk status: %s", msg)
9069       self.feedback_fn("Migration failed, aborting")
9070       self._AbortMigration()
9071       self._RevertDiskStatus()
9072       raise errors.OpExecError("Could not migrate instance %s: %s" %
9073                                (instance.name, msg))
9074
9075     self.feedback_fn("* starting memory transfer")
9076     last_feedback = time.time()
9077     while True:
9078       result = self.rpc.call_instance_get_migration_status(source_node,
9079                                                            instance)
9080       msg = result.fail_msg
9081       ms = result.payload   # MigrationStatus instance
9082       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9083         logging.error("Instance migration failed, trying to revert"
9084                       " disk status: %s", msg)
9085         self.feedback_fn("Migration failed, aborting")
9086         self._AbortMigration()
9087         self._RevertDiskStatus()
9088         if not msg:
9089           msg = "hypervisor returned failure"
9090         raise errors.OpExecError("Could not migrate instance %s: %s" %
9091                                  (instance.name, msg))
9092
9093       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9094         self.feedback_fn("* memory transfer complete")
9095         break
9096
9097       if (utils.TimeoutExpired(last_feedback,
9098                                self._MIGRATION_FEEDBACK_INTERVAL) and
9099           ms.transferred_ram is not None):
9100         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9101         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9102         last_feedback = time.time()
9103
9104       time.sleep(self._MIGRATION_POLL_INTERVAL)
9105
9106     result = self.rpc.call_instance_finalize_migration_src(source_node,
9107                                                            instance,
9108                                                            True,
9109                                                            self.live)
9110     msg = result.fail_msg
9111     if msg:
9112       logging.error("Instance migration succeeded, but finalization failed"
9113                     " on the source node: %s", msg)
9114       raise errors.OpExecError("Could not finalize instance migration: %s" %
9115                                msg)
9116
9117     instance.primary_node = target_node
9118
9119     # distribute new instance config to the other nodes
9120     self.cfg.Update(instance, self.feedback_fn)
9121
9122     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9123                                                            instance,
9124                                                            migration_info,
9125                                                            True)
9126     msg = result.fail_msg
9127     if msg:
9128       logging.error("Instance migration succeeded, but finalization failed"
9129                     " on the target node: %s", msg)
9130       raise errors.OpExecError("Could not finalize instance migration: %s" %
9131                                msg)
9132
9133     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9134       self._EnsureSecondary(source_node)
9135       self._WaitUntilSync()
9136       self._GoStandalone()
9137       self._GoReconnect(False)
9138       self._WaitUntilSync()
9139
9140     # If the instance's disk template is `rbd' or `ext' and there was a
9141     # successful migration, unmap the device from the source node.
9142     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9143       disks = _ExpandCheckDisks(instance, instance.disks)
9144       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9145       for disk in disks:
9146         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9147         msg = result.fail_msg
9148         if msg:
9149           logging.error("Migration was successful, but couldn't unmap the"
9150                         " block device %s on source node %s: %s",
9151                         disk.iv_name, source_node, msg)
9152           logging.error("You need to unmap the device %s manually on %s",
9153                         disk.iv_name, source_node)
9154
9155     self.feedback_fn("* done")
9156
9157   def _ExecFailover(self):
9158     """Failover an instance.
9159
9160     The failover is done by shutting it down on its present node and
9161     starting it on the secondary.
9162
9163     """
9164     instance = self.instance
9165     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9166
9167     source_node = instance.primary_node
9168     target_node = self.target_node
9169
9170     if instance.admin_state == constants.ADMINST_UP:
9171       self.feedback_fn("* checking disk consistency between source and target")
9172       for (idx, dev) in enumerate(instance.disks):
9173         # for drbd, these are drbd over lvm
9174         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9175                                      False):
9176           if primary_node.offline:
9177             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9178                              " target node %s" %
9179                              (primary_node.name, idx, target_node))
9180           elif not self.ignore_consistency:
9181             raise errors.OpExecError("Disk %s is degraded on target node,"
9182                                      " aborting failover" % idx)
9183     else:
9184       self.feedback_fn("* not checking disk consistency as instance is not"
9185                        " running")
9186
9187     self.feedback_fn("* shutting down instance on source node")
9188     logging.info("Shutting down instance %s on node %s",
9189                  instance.name, source_node)
9190
9191     result = self.rpc.call_instance_shutdown(source_node, instance,
9192                                              self.shutdown_timeout)
9193     msg = result.fail_msg
9194     if msg:
9195       if self.ignore_consistency or primary_node.offline:
9196         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9197                            " proceeding anyway; please make sure node"
9198                            " %s is down; error details: %s",
9199                            instance.name, source_node, source_node, msg)
9200       else:
9201         raise errors.OpExecError("Could not shutdown instance %s on"
9202                                  " node %s: %s" %
9203                                  (instance.name, source_node, msg))
9204
9205     self.feedback_fn("* deactivating the instance's disks on source node")
9206     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9207       raise errors.OpExecError("Can't shut down the instance's disks")
9208
9209     instance.primary_node = target_node
9210     # distribute new instance config to the other nodes
9211     self.cfg.Update(instance, self.feedback_fn)
9212
9213     # Only start the instance if it's marked as up
9214     if instance.admin_state == constants.ADMINST_UP:
9215       self.feedback_fn("* activating the instance's disks on target node %s" %
9216                        target_node)
9217       logging.info("Starting instance %s on node %s",
9218                    instance.name, target_node)
9219
9220       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9221                                            ignore_secondaries=True)
9222       if not disks_ok:
9223         _ShutdownInstanceDisks(self.lu, instance)
9224         raise errors.OpExecError("Can't activate the instance's disks")
9225
9226       self.feedback_fn("* starting the instance on the target node %s" %
9227                        target_node)
9228       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9229                                             False)
9230       msg = result.fail_msg
9231       if msg:
9232         _ShutdownInstanceDisks(self.lu, instance)
9233         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9234                                  (instance.name, target_node, msg))
9235
9236   def Exec(self, feedback_fn):
9237     """Perform the migration.
9238
9239     """
9240     self.feedback_fn = feedback_fn
9241     self.source_node = self.instance.primary_node
9242
9243     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9244     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9245       self.target_node = self.instance.secondary_nodes[0]
9246       # Otherwise self.target_node has been populated either
9247       # directly, or through an iallocator.
9248
9249     self.all_nodes = [self.source_node, self.target_node]
9250     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9251                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9252
9253     if self.failover:
9254       feedback_fn("Failover instance %s" % self.instance.name)
9255       self._ExecFailover()
9256     else:
9257       feedback_fn("Migrating instance %s" % self.instance.name)
9258
9259       if self.cleanup:
9260         return self._ExecCleanup()
9261       else:
9262         return self._ExecMigration()
9263
9264
9265 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9266                     force_open):
9267   """Wrapper around L{_CreateBlockDevInner}.
9268
9269   This method annotates the root device first.
9270
9271   """
9272   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9273   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9274   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9275                               force_open, excl_stor)
9276
9277
9278 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9279                          info, force_open, excl_stor):
9280   """Create a tree of block devices on a given node.
9281
9282   If this device type has to be created on secondaries, create it and
9283   all its children.
9284
9285   If not, just recurse to children keeping the same 'force' value.
9286
9287   @attention: The device has to be annotated already.
9288
9289   @param lu: the lu on whose behalf we execute
9290   @param node: the node on which to create the device
9291   @type instance: L{objects.Instance}
9292   @param instance: the instance which owns the device
9293   @type device: L{objects.Disk}
9294   @param device: the device to create
9295   @type force_create: boolean
9296   @param force_create: whether to force creation of this device; this
9297       will be change to True whenever we find a device which has
9298       CreateOnSecondary() attribute
9299   @param info: the extra 'metadata' we should attach to the device
9300       (this will be represented as a LVM tag)
9301   @type force_open: boolean
9302   @param force_open: this parameter will be passes to the
9303       L{backend.BlockdevCreate} function where it specifies
9304       whether we run on primary or not, and it affects both
9305       the child assembly and the device own Open() execution
9306   @type excl_stor: boolean
9307   @param excl_stor: Whether exclusive_storage is active for the node
9308
9309   """
9310   if device.CreateOnSecondary():
9311     force_create = True
9312
9313   if device.children:
9314     for child in device.children:
9315       _CreateBlockDevInner(lu, node, instance, child, force_create,
9316                            info, force_open, excl_stor)
9317
9318   if not force_create:
9319     return
9320
9321   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9322                         excl_stor)
9323
9324
9325 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9326                           excl_stor):
9327   """Create a single block device on a given node.
9328
9329   This will not recurse over children of the device, so they must be
9330   created in advance.
9331
9332   @param lu: the lu on whose behalf we execute
9333   @param node: the node on which to create the device
9334   @type instance: L{objects.Instance}
9335   @param instance: the instance which owns the device
9336   @type device: L{objects.Disk}
9337   @param device: the device to create
9338   @param info: the extra 'metadata' we should attach to the device
9339       (this will be represented as a LVM tag)
9340   @type force_open: boolean
9341   @param force_open: this parameter will be passes to the
9342       L{backend.BlockdevCreate} function where it specifies
9343       whether we run on primary or not, and it affects both
9344       the child assembly and the device own Open() execution
9345   @type excl_stor: boolean
9346   @param excl_stor: Whether exclusive_storage is active for the node
9347
9348   """
9349   lu.cfg.SetDiskID(device, node)
9350   result = lu.rpc.call_blockdev_create(node, device, device.size,
9351                                        instance.name, force_open, info,
9352                                        excl_stor)
9353   result.Raise("Can't create block device %s on"
9354                " node %s for instance %s" % (device, node, instance.name))
9355   if device.physical_id is None:
9356     device.physical_id = result.payload
9357
9358
9359 def _GenerateUniqueNames(lu, exts):
9360   """Generate a suitable LV name.
9361
9362   This will generate a logical volume name for the given instance.
9363
9364   """
9365   results = []
9366   for val in exts:
9367     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9368     results.append("%s%s" % (new_id, val))
9369   return results
9370
9371
9372 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9373                          iv_name, p_minor, s_minor):
9374   """Generate a drbd8 device complete with its children.
9375
9376   """
9377   assert len(vgnames) == len(names) == 2
9378   port = lu.cfg.AllocatePort()
9379   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9380
9381   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9382                           logical_id=(vgnames[0], names[0]),
9383                           params={})
9384   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9385                           size=constants.DRBD_META_SIZE,
9386                           logical_id=(vgnames[1], names[1]),
9387                           params={})
9388   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9389                           logical_id=(primary, secondary, port,
9390                                       p_minor, s_minor,
9391                                       shared_secret),
9392                           children=[dev_data, dev_meta],
9393                           iv_name=iv_name, params={})
9394   return drbd_dev
9395
9396
9397 _DISK_TEMPLATE_NAME_PREFIX = {
9398   constants.DT_PLAIN: "",
9399   constants.DT_RBD: ".rbd",
9400   constants.DT_EXT: ".ext",
9401   }
9402
9403
9404 _DISK_TEMPLATE_DEVICE_TYPE = {
9405   constants.DT_PLAIN: constants.LD_LV,
9406   constants.DT_FILE: constants.LD_FILE,
9407   constants.DT_SHARED_FILE: constants.LD_FILE,
9408   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9409   constants.DT_RBD: constants.LD_RBD,
9410   constants.DT_EXT: constants.LD_EXT,
9411   }
9412
9413
9414 def _GenerateDiskTemplate(
9415   lu, template_name, instance_name, primary_node, secondary_nodes,
9416   disk_info, file_storage_dir, file_driver, base_index,
9417   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9418   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9419   """Generate the entire disk layout for a given template type.
9420
9421   """
9422   vgname = lu.cfg.GetVGName()
9423   disk_count = len(disk_info)
9424   disks = []
9425
9426   if template_name == constants.DT_DISKLESS:
9427     pass
9428   elif template_name == constants.DT_DRBD8:
9429     if len(secondary_nodes) != 1:
9430       raise errors.ProgrammerError("Wrong template configuration")
9431     remote_node = secondary_nodes[0]
9432     minors = lu.cfg.AllocateDRBDMinor(
9433       [primary_node, remote_node] * len(disk_info), instance_name)
9434
9435     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9436                                                        full_disk_params)
9437     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9438
9439     names = []
9440     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9441                                                for i in range(disk_count)]):
9442       names.append(lv_prefix + "_data")
9443       names.append(lv_prefix + "_meta")
9444     for idx, disk in enumerate(disk_info):
9445       disk_index = idx + base_index
9446       data_vg = disk.get(constants.IDISK_VG, vgname)
9447       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9448       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9449                                       disk[constants.IDISK_SIZE],
9450                                       [data_vg, meta_vg],
9451                                       names[idx * 2:idx * 2 + 2],
9452                                       "disk/%d" % disk_index,
9453                                       minors[idx * 2], minors[idx * 2 + 1])
9454       disk_dev.mode = disk[constants.IDISK_MODE]
9455       disks.append(disk_dev)
9456   else:
9457     if secondary_nodes:
9458       raise errors.ProgrammerError("Wrong template configuration")
9459
9460     if template_name == constants.DT_FILE:
9461       _req_file_storage()
9462     elif template_name == constants.DT_SHARED_FILE:
9463       _req_shr_file_storage()
9464
9465     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9466     if name_prefix is None:
9467       names = None
9468     else:
9469       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9470                                         (name_prefix, base_index + i)
9471                                         for i in range(disk_count)])
9472
9473     if template_name == constants.DT_PLAIN:
9474
9475       def logical_id_fn(idx, _, disk):
9476         vg = disk.get(constants.IDISK_VG, vgname)
9477         return (vg, names[idx])
9478
9479     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9480       logical_id_fn = \
9481         lambda _, disk_index, disk: (file_driver,
9482                                      "%s/disk%d" % (file_storage_dir,
9483                                                     disk_index))
9484     elif template_name == constants.DT_BLOCK:
9485       logical_id_fn = \
9486         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9487                                        disk[constants.IDISK_ADOPT])
9488     elif template_name == constants.DT_RBD:
9489       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9490     elif template_name == constants.DT_EXT:
9491       def logical_id_fn(idx, _, disk):
9492         provider = disk.get(constants.IDISK_PROVIDER, None)
9493         if provider is None:
9494           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9495                                        " not found", constants.DT_EXT,
9496                                        constants.IDISK_PROVIDER)
9497         return (provider, names[idx])
9498     else:
9499       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9500
9501     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9502
9503     for idx, disk in enumerate(disk_info):
9504       params = {}
9505       # Only for the Ext template add disk_info to params
9506       if template_name == constants.DT_EXT:
9507         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9508         for key in disk:
9509           if key not in constants.IDISK_PARAMS:
9510             params[key] = disk[key]
9511       disk_index = idx + base_index
9512       size = disk[constants.IDISK_SIZE]
9513       feedback_fn("* disk %s, size %s" %
9514                   (disk_index, utils.FormatUnit(size, "h")))
9515       disks.append(objects.Disk(dev_type=dev_type, size=size,
9516                                 logical_id=logical_id_fn(idx, disk_index, disk),
9517                                 iv_name="disk/%d" % disk_index,
9518                                 mode=disk[constants.IDISK_MODE],
9519                                 params=params))
9520
9521   return disks
9522
9523
9524 def _GetInstanceInfoText(instance):
9525   """Compute that text that should be added to the disk's metadata.
9526
9527   """
9528   return "originstname+%s" % instance.name
9529
9530
9531 def _CalcEta(time_taken, written, total_size):
9532   """Calculates the ETA based on size written and total size.
9533
9534   @param time_taken: The time taken so far
9535   @param written: amount written so far
9536   @param total_size: The total size of data to be written
9537   @return: The remaining time in seconds
9538
9539   """
9540   avg_time = time_taken / float(written)
9541   return (total_size - written) * avg_time
9542
9543
9544 def _WipeDisks(lu, instance, disks=None):
9545   """Wipes instance disks.
9546
9547   @type lu: L{LogicalUnit}
9548   @param lu: the logical unit on whose behalf we execute
9549   @type instance: L{objects.Instance}
9550   @param instance: the instance whose disks we should create
9551   @return: the success of the wipe
9552
9553   """
9554   node = instance.primary_node
9555
9556   if disks is None:
9557     disks = [(idx, disk, 0)
9558              for (idx, disk) in enumerate(instance.disks)]
9559
9560   for (_, device, _) in disks:
9561     lu.cfg.SetDiskID(device, node)
9562
9563   logging.info("Pausing synchronization of disks of instance '%s'",
9564                instance.name)
9565   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9566                                                   (map(compat.snd, disks),
9567                                                    instance),
9568                                                   True)
9569   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9570
9571   for idx, success in enumerate(result.payload):
9572     if not success:
9573       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9574                    " failed", idx, instance.name)
9575
9576   try:
9577     for (idx, device, offset) in disks:
9578       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9579       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9580       wipe_chunk_size = \
9581         int(min(constants.MAX_WIPE_CHUNK,
9582                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9583
9584       size = device.size
9585       last_output = 0
9586       start_time = time.time()
9587
9588       if offset == 0:
9589         info_text = ""
9590       else:
9591         info_text = (" (from %s to %s)" %
9592                      (utils.FormatUnit(offset, "h"),
9593                       utils.FormatUnit(size, "h")))
9594
9595       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9596
9597       logging.info("Wiping disk %d for instance %s on node %s using"
9598                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9599
9600       while offset < size:
9601         wipe_size = min(wipe_chunk_size, size - offset)
9602
9603         logging.debug("Wiping disk %d, offset %s, chunk %s",
9604                       idx, offset, wipe_size)
9605
9606         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9607                                            wipe_size)
9608         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9609                      (idx, offset, wipe_size))
9610
9611         now = time.time()
9612         offset += wipe_size
9613         if now - last_output >= 60:
9614           eta = _CalcEta(now - start_time, offset, size)
9615           lu.LogInfo(" - done: %.1f%% ETA: %s",
9616                      offset / float(size) * 100, utils.FormatSeconds(eta))
9617           last_output = now
9618   finally:
9619     logging.info("Resuming synchronization of disks for instance '%s'",
9620                  instance.name)
9621
9622     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9623                                                     (map(compat.snd, disks),
9624                                                      instance),
9625                                                     False)
9626
9627     if result.fail_msg:
9628       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9629                     node, result.fail_msg)
9630     else:
9631       for idx, success in enumerate(result.payload):
9632         if not success:
9633           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9634                         " failed", idx, instance.name)
9635
9636
9637 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9638   """Create all disks for an instance.
9639
9640   This abstracts away some work from AddInstance.
9641
9642   @type lu: L{LogicalUnit}
9643   @param lu: the logical unit on whose behalf we execute
9644   @type instance: L{objects.Instance}
9645   @param instance: the instance whose disks we should create
9646   @type to_skip: list
9647   @param to_skip: list of indices to skip
9648   @type target_node: string
9649   @param target_node: if passed, overrides the target node for creation
9650   @rtype: boolean
9651   @return: the success of the creation
9652
9653   """
9654   info = _GetInstanceInfoText(instance)
9655   if target_node is None:
9656     pnode = instance.primary_node
9657     all_nodes = instance.all_nodes
9658   else:
9659     pnode = target_node
9660     all_nodes = [pnode]
9661
9662   if instance.disk_template in constants.DTS_FILEBASED:
9663     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9664     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9665
9666     result.Raise("Failed to create directory '%s' on"
9667                  " node %s" % (file_storage_dir, pnode))
9668
9669   # Note: this needs to be kept in sync with adding of disks in
9670   # LUInstanceSetParams
9671   for idx, device in enumerate(instance.disks):
9672     if to_skip and idx in to_skip:
9673       continue
9674     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9675     #HARDCODE
9676     for node in all_nodes:
9677       f_create = node == pnode
9678       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9679
9680
9681 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9682   """Remove all disks for an instance.
9683
9684   This abstracts away some work from `AddInstance()` and
9685   `RemoveInstance()`. Note that in case some of the devices couldn't
9686   be removed, the removal will continue with the other ones (compare
9687   with `_CreateDisks()`).
9688
9689   @type lu: L{LogicalUnit}
9690   @param lu: the logical unit on whose behalf we execute
9691   @type instance: L{objects.Instance}
9692   @param instance: the instance whose disks we should remove
9693   @type target_node: string
9694   @param target_node: used to override the node on which to remove the disks
9695   @rtype: boolean
9696   @return: the success of the removal
9697
9698   """
9699   logging.info("Removing block devices for instance %s", instance.name)
9700
9701   all_result = True
9702   ports_to_release = set()
9703   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9704   for (idx, device) in enumerate(anno_disks):
9705     if target_node:
9706       edata = [(target_node, device)]
9707     else:
9708       edata = device.ComputeNodeTree(instance.primary_node)
9709     for node, disk in edata:
9710       lu.cfg.SetDiskID(disk, node)
9711       result = lu.rpc.call_blockdev_remove(node, disk)
9712       if result.fail_msg:
9713         lu.LogWarning("Could not remove disk %s on node %s,"
9714                       " continuing anyway: %s", idx, node, result.fail_msg)
9715         if not (result.offline and node != instance.primary_node):
9716           all_result = False
9717
9718     # if this is a DRBD disk, return its port to the pool
9719     if device.dev_type in constants.LDS_DRBD:
9720       ports_to_release.add(device.logical_id[2])
9721
9722   if all_result or ignore_failures:
9723     for port in ports_to_release:
9724       lu.cfg.AddTcpUdpPort(port)
9725
9726   if instance.disk_template in constants.DTS_FILEBASED:
9727     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9728     if target_node:
9729       tgt = target_node
9730     else:
9731       tgt = instance.primary_node
9732     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9733     if result.fail_msg:
9734       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9735                     file_storage_dir, instance.primary_node, result.fail_msg)
9736       all_result = False
9737
9738   return all_result
9739
9740
9741 def _ComputeDiskSizePerVG(disk_template, disks):
9742   """Compute disk size requirements in the volume group
9743
9744   """
9745   def _compute(disks, payload):
9746     """Universal algorithm.
9747
9748     """
9749     vgs = {}
9750     for disk in disks:
9751       vgs[disk[constants.IDISK_VG]] = \
9752         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9753
9754     return vgs
9755
9756   # Required free disk space as a function of disk and swap space
9757   req_size_dict = {
9758     constants.DT_DISKLESS: {},
9759     constants.DT_PLAIN: _compute(disks, 0),
9760     # 128 MB are added for drbd metadata for each disk
9761     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9762     constants.DT_FILE: {},
9763     constants.DT_SHARED_FILE: {},
9764   }
9765
9766   if disk_template not in req_size_dict:
9767     raise errors.ProgrammerError("Disk template '%s' size requirement"
9768                                  " is unknown" % disk_template)
9769
9770   return req_size_dict[disk_template]
9771
9772
9773 def _FilterVmNodes(lu, nodenames):
9774   """Filters out non-vm_capable nodes from a list.
9775
9776   @type lu: L{LogicalUnit}
9777   @param lu: the logical unit for which we check
9778   @type nodenames: list
9779   @param nodenames: the list of nodes on which we should check
9780   @rtype: list
9781   @return: the list of vm-capable nodes
9782
9783   """
9784   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9785   return [name for name in nodenames if name not in vm_nodes]
9786
9787
9788 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9789   """Hypervisor parameter validation.
9790
9791   This function abstract the hypervisor parameter validation to be
9792   used in both instance create and instance modify.
9793
9794   @type lu: L{LogicalUnit}
9795   @param lu: the logical unit for which we check
9796   @type nodenames: list
9797   @param nodenames: the list of nodes on which we should check
9798   @type hvname: string
9799   @param hvname: the name of the hypervisor we should use
9800   @type hvparams: dict
9801   @param hvparams: the parameters which we need to check
9802   @raise errors.OpPrereqError: if the parameters are not valid
9803
9804   """
9805   nodenames = _FilterVmNodes(lu, nodenames)
9806
9807   cluster = lu.cfg.GetClusterInfo()
9808   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9809
9810   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9811   for node in nodenames:
9812     info = hvinfo[node]
9813     if info.offline:
9814       continue
9815     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9816
9817
9818 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9819   """OS parameters validation.
9820
9821   @type lu: L{LogicalUnit}
9822   @param lu: the logical unit for which we check
9823   @type required: boolean
9824   @param required: whether the validation should fail if the OS is not
9825       found
9826   @type nodenames: list
9827   @param nodenames: the list of nodes on which we should check
9828   @type osname: string
9829   @param osname: the name of the hypervisor we should use
9830   @type osparams: dict
9831   @param osparams: the parameters which we need to check
9832   @raise errors.OpPrereqError: if the parameters are not valid
9833
9834   """
9835   nodenames = _FilterVmNodes(lu, nodenames)
9836   result = lu.rpc.call_os_validate(nodenames, required, osname,
9837                                    [constants.OS_VALIDATE_PARAMETERS],
9838                                    osparams)
9839   for node, nres in result.items():
9840     # we don't check for offline cases since this should be run only
9841     # against the master node and/or an instance's nodes
9842     nres.Raise("OS Parameters validation failed on node %s" % node)
9843     if not nres.payload:
9844       lu.LogInfo("OS %s not found on node %s, validation skipped",
9845                  osname, node)
9846
9847
9848 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9849   """Wrapper around IAReqInstanceAlloc.
9850
9851   @param op: The instance opcode
9852   @param disks: The computed disks
9853   @param nics: The computed nics
9854   @param beparams: The full filled beparams
9855   @param node_whitelist: List of nodes which should appear as online to the
9856     allocator (unless the node is already marked offline)
9857
9858   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9859
9860   """
9861   spindle_use = beparams[constants.BE_SPINDLE_USE]
9862   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9863                                        disk_template=op.disk_template,
9864                                        tags=op.tags,
9865                                        os=op.os_type,
9866                                        vcpus=beparams[constants.BE_VCPUS],
9867                                        memory=beparams[constants.BE_MAXMEM],
9868                                        spindle_use=spindle_use,
9869                                        disks=disks,
9870                                        nics=[n.ToDict() for n in nics],
9871                                        hypervisor=op.hypervisor,
9872                                        node_whitelist=node_whitelist)
9873
9874
9875 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9876   """Computes the nics.
9877
9878   @param op: The instance opcode
9879   @param cluster: Cluster configuration object
9880   @param default_ip: The default ip to assign
9881   @param cfg: An instance of the configuration object
9882   @param ec_id: Execution context ID
9883
9884   @returns: The build up nics
9885
9886   """
9887   nics = []
9888   for nic in op.nics:
9889     nic_mode_req = nic.get(constants.INIC_MODE, None)
9890     nic_mode = nic_mode_req
9891     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9892       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9893
9894     net = nic.get(constants.INIC_NETWORK, None)
9895     link = nic.get(constants.NIC_LINK, None)
9896     ip = nic.get(constants.INIC_IP, None)
9897
9898     if net is None or net.lower() == constants.VALUE_NONE:
9899       net = None
9900     else:
9901       if nic_mode_req is not None or link is not None:
9902         raise errors.OpPrereqError("If network is given, no mode or link"
9903                                    " is allowed to be passed",
9904                                    errors.ECODE_INVAL)
9905
9906     # ip validity checks
9907     if ip is None or ip.lower() == constants.VALUE_NONE:
9908       nic_ip = None
9909     elif ip.lower() == constants.VALUE_AUTO:
9910       if not op.name_check:
9911         raise errors.OpPrereqError("IP address set to auto but name checks"
9912                                    " have been skipped",
9913                                    errors.ECODE_INVAL)
9914       nic_ip = default_ip
9915     else:
9916       # We defer pool operations until later, so that the iallocator has
9917       # filled in the instance's node(s) dimara
9918       if ip.lower() == constants.NIC_IP_POOL:
9919         if net is None:
9920           raise errors.OpPrereqError("if ip=pool, parameter network"
9921                                      " must be passed too",
9922                                      errors.ECODE_INVAL)
9923
9924       elif not netutils.IPAddress.IsValid(ip):
9925         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9926                                    errors.ECODE_INVAL)
9927
9928       nic_ip = ip
9929
9930     # TODO: check the ip address for uniqueness
9931     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9932       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9933                                  errors.ECODE_INVAL)
9934
9935     # MAC address verification
9936     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9937     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9938       mac = utils.NormalizeAndValidateMac(mac)
9939
9940       try:
9941         # TODO: We need to factor this out
9942         cfg.ReserveMAC(mac, ec_id)
9943       except errors.ReservationError:
9944         raise errors.OpPrereqError("MAC address %s already in use"
9945                                    " in cluster" % mac,
9946                                    errors.ECODE_NOTUNIQUE)
9947
9948     #  Build nic parameters
9949     nicparams = {}
9950     if nic_mode_req:
9951       nicparams[constants.NIC_MODE] = nic_mode
9952     if link:
9953       nicparams[constants.NIC_LINK] = link
9954
9955     check_params = cluster.SimpleFillNIC(nicparams)
9956     objects.NIC.CheckParameterSyntax(check_params)
9957     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9958                             network=net, nicparams=nicparams))
9959
9960   return nics
9961
9962
9963 def _ComputeDisks(op, default_vg):
9964   """Computes the instance disks.
9965
9966   @param op: The instance opcode
9967   @param default_vg: The default_vg to assume
9968
9969   @return: The computed disks
9970
9971   """
9972   disks = []
9973   for disk in op.disks:
9974     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9975     if mode not in constants.DISK_ACCESS_SET:
9976       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9977                                  mode, errors.ECODE_INVAL)
9978     size = disk.get(constants.IDISK_SIZE, None)
9979     if size is None:
9980       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9981     try:
9982       size = int(size)
9983     except (TypeError, ValueError):
9984       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9985                                  errors.ECODE_INVAL)
9986
9987     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9988     if ext_provider and op.disk_template != constants.DT_EXT:
9989       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9990                                  " disk template, not %s" %
9991                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
9992                                  op.disk_template), errors.ECODE_INVAL)
9993
9994     data_vg = disk.get(constants.IDISK_VG, default_vg)
9995     new_disk = {
9996       constants.IDISK_SIZE: size,
9997       constants.IDISK_MODE: mode,
9998       constants.IDISK_VG: data_vg,
9999       }
10000
10001     if constants.IDISK_METAVG in disk:
10002       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10003     if constants.IDISK_ADOPT in disk:
10004       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10005
10006     # For extstorage, demand the `provider' option and add any
10007     # additional parameters (ext-params) to the dict
10008     if op.disk_template == constants.DT_EXT:
10009       if ext_provider:
10010         new_disk[constants.IDISK_PROVIDER] = ext_provider
10011         for key in disk:
10012           if key not in constants.IDISK_PARAMS:
10013             new_disk[key] = disk[key]
10014       else:
10015         raise errors.OpPrereqError("Missing provider for template '%s'" %
10016                                    constants.DT_EXT, errors.ECODE_INVAL)
10017
10018     disks.append(new_disk)
10019
10020   return disks
10021
10022
10023 def _ComputeFullBeParams(op, cluster):
10024   """Computes the full beparams.
10025
10026   @param op: The instance opcode
10027   @param cluster: The cluster config object
10028
10029   @return: The fully filled beparams
10030
10031   """
10032   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10033   for param, value in op.beparams.iteritems():
10034     if value == constants.VALUE_AUTO:
10035       op.beparams[param] = default_beparams[param]
10036   objects.UpgradeBeParams(op.beparams)
10037   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10038   return cluster.SimpleFillBE(op.beparams)
10039
10040
10041 def _CheckOpportunisticLocking(op):
10042   """Generate error if opportunistic locking is not possible.
10043
10044   """
10045   if op.opportunistic_locking and not op.iallocator:
10046     raise errors.OpPrereqError("Opportunistic locking is only available in"
10047                                " combination with an instance allocator",
10048                                errors.ECODE_INVAL)
10049
10050
10051 class LUInstanceCreate(LogicalUnit):
10052   """Create an instance.
10053
10054   """
10055   HPATH = "instance-add"
10056   HTYPE = constants.HTYPE_INSTANCE
10057   REQ_BGL = False
10058
10059   def CheckArguments(self):
10060     """Check arguments.
10061
10062     """
10063     # do not require name_check to ease forward/backward compatibility
10064     # for tools
10065     if self.op.no_install and self.op.start:
10066       self.LogInfo("No-installation mode selected, disabling startup")
10067       self.op.start = False
10068     # validate/normalize the instance name
10069     self.op.instance_name = \
10070       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10071
10072     if self.op.ip_check and not self.op.name_check:
10073       # TODO: make the ip check more flexible and not depend on the name check
10074       raise errors.OpPrereqError("Cannot do IP address check without a name"
10075                                  " check", errors.ECODE_INVAL)
10076
10077     # check nics' parameter names
10078     for nic in self.op.nics:
10079       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10080
10081     # check disks. parameter names and consistent adopt/no-adopt strategy
10082     has_adopt = has_no_adopt = False
10083     for disk in self.op.disks:
10084       if self.op.disk_template != constants.DT_EXT:
10085         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10086       if constants.IDISK_ADOPT in disk:
10087         has_adopt = True
10088       else:
10089         has_no_adopt = True
10090     if has_adopt and has_no_adopt:
10091       raise errors.OpPrereqError("Either all disks are adopted or none is",
10092                                  errors.ECODE_INVAL)
10093     if has_adopt:
10094       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10095         raise errors.OpPrereqError("Disk adoption is not supported for the"
10096                                    " '%s' disk template" %
10097                                    self.op.disk_template,
10098                                    errors.ECODE_INVAL)
10099       if self.op.iallocator is not None:
10100         raise errors.OpPrereqError("Disk adoption not allowed with an"
10101                                    " iallocator script", errors.ECODE_INVAL)
10102       if self.op.mode == constants.INSTANCE_IMPORT:
10103         raise errors.OpPrereqError("Disk adoption not allowed for"
10104                                    " instance import", errors.ECODE_INVAL)
10105     else:
10106       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10107         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10108                                    " but no 'adopt' parameter given" %
10109                                    self.op.disk_template,
10110                                    errors.ECODE_INVAL)
10111
10112     self.adopt_disks = has_adopt
10113
10114     # instance name verification
10115     if self.op.name_check:
10116       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10117       self.op.instance_name = self.hostname1.name
10118       # used in CheckPrereq for ip ping check
10119       self.check_ip = self.hostname1.ip
10120     else:
10121       self.check_ip = None
10122
10123     # file storage checks
10124     if (self.op.file_driver and
10125         not self.op.file_driver in constants.FILE_DRIVER):
10126       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10127                                  self.op.file_driver, errors.ECODE_INVAL)
10128
10129     if self.op.disk_template == constants.DT_FILE:
10130       opcodes.RequireFileStorage()
10131     elif self.op.disk_template == constants.DT_SHARED_FILE:
10132       opcodes.RequireSharedFileStorage()
10133
10134     ### Node/iallocator related checks
10135     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10136
10137     if self.op.pnode is not None:
10138       if self.op.disk_template in constants.DTS_INT_MIRROR:
10139         if self.op.snode is None:
10140           raise errors.OpPrereqError("The networked disk templates need"
10141                                      " a mirror node", errors.ECODE_INVAL)
10142       elif self.op.snode:
10143         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10144                         " template")
10145         self.op.snode = None
10146
10147     _CheckOpportunisticLocking(self.op)
10148
10149     self._cds = _GetClusterDomainSecret()
10150
10151     if self.op.mode == constants.INSTANCE_IMPORT:
10152       # On import force_variant must be True, because if we forced it at
10153       # initial install, our only chance when importing it back is that it
10154       # works again!
10155       self.op.force_variant = True
10156
10157       if self.op.no_install:
10158         self.LogInfo("No-installation mode has no effect during import")
10159
10160     elif self.op.mode == constants.INSTANCE_CREATE:
10161       if self.op.os_type is None:
10162         raise errors.OpPrereqError("No guest OS specified",
10163                                    errors.ECODE_INVAL)
10164       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10165         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10166                                    " installation" % self.op.os_type,
10167                                    errors.ECODE_STATE)
10168       if self.op.disk_template is None:
10169         raise errors.OpPrereqError("No disk template specified",
10170                                    errors.ECODE_INVAL)
10171
10172     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10173       # Check handshake to ensure both clusters have the same domain secret
10174       src_handshake = self.op.source_handshake
10175       if not src_handshake:
10176         raise errors.OpPrereqError("Missing source handshake",
10177                                    errors.ECODE_INVAL)
10178
10179       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10180                                                            src_handshake)
10181       if errmsg:
10182         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10183                                    errors.ECODE_INVAL)
10184
10185       # Load and check source CA
10186       self.source_x509_ca_pem = self.op.source_x509_ca
10187       if not self.source_x509_ca_pem:
10188         raise errors.OpPrereqError("Missing source X509 CA",
10189                                    errors.ECODE_INVAL)
10190
10191       try:
10192         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10193                                                     self._cds)
10194       except OpenSSL.crypto.Error, err:
10195         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10196                                    (err, ), errors.ECODE_INVAL)
10197
10198       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10199       if errcode is not None:
10200         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10201                                    errors.ECODE_INVAL)
10202
10203       self.source_x509_ca = cert
10204
10205       src_instance_name = self.op.source_instance_name
10206       if not src_instance_name:
10207         raise errors.OpPrereqError("Missing source instance name",
10208                                    errors.ECODE_INVAL)
10209
10210       self.source_instance_name = \
10211           netutils.GetHostname(name=src_instance_name).name
10212
10213     else:
10214       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10215                                  self.op.mode, errors.ECODE_INVAL)
10216
10217   def ExpandNames(self):
10218     """ExpandNames for CreateInstance.
10219
10220     Figure out the right locks for instance creation.
10221
10222     """
10223     self.needed_locks = {}
10224
10225     instance_name = self.op.instance_name
10226     # this is just a preventive check, but someone might still add this
10227     # instance in the meantime, and creation will fail at lock-add time
10228     if instance_name in self.cfg.GetInstanceList():
10229       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10230                                  instance_name, errors.ECODE_EXISTS)
10231
10232     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10233
10234     if self.op.iallocator:
10235       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10236       # specifying a group on instance creation and then selecting nodes from
10237       # that group
10238       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10239       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10240
10241       if self.op.opportunistic_locking:
10242         self.opportunistic_locks[locking.LEVEL_NODE] = True
10243         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10244     else:
10245       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10246       nodelist = [self.op.pnode]
10247       if self.op.snode is not None:
10248         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10249         nodelist.append(self.op.snode)
10250       self.needed_locks[locking.LEVEL_NODE] = nodelist
10251
10252     # in case of import lock the source node too
10253     if self.op.mode == constants.INSTANCE_IMPORT:
10254       src_node = self.op.src_node
10255       src_path = self.op.src_path
10256
10257       if src_path is None:
10258         self.op.src_path = src_path = self.op.instance_name
10259
10260       if src_node is None:
10261         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10262         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10263         self.op.src_node = None
10264         if os.path.isabs(src_path):
10265           raise errors.OpPrereqError("Importing an instance from a path"
10266                                      " requires a source node option",
10267                                      errors.ECODE_INVAL)
10268       else:
10269         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10270         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10271           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10272         if not os.path.isabs(src_path):
10273           self.op.src_path = src_path = \
10274             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10275
10276     self.needed_locks[locking.LEVEL_NODE_RES] = \
10277       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10278
10279   def _RunAllocator(self):
10280     """Run the allocator based on input opcode.
10281
10282     """
10283     if self.op.opportunistic_locking:
10284       # Only consider nodes for which a lock is held
10285       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10286     else:
10287       node_whitelist = None
10288
10289     #TODO Export network to iallocator so that it chooses a pnode
10290     #     in a nodegroup that has the desired network connected to
10291     req = _CreateInstanceAllocRequest(self.op, self.disks,
10292                                       self.nics, self.be_full,
10293                                       node_whitelist)
10294     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10295
10296     ial.Run(self.op.iallocator)
10297
10298     if not ial.success:
10299       # When opportunistic locks are used only a temporary failure is generated
10300       if self.op.opportunistic_locking:
10301         ecode = errors.ECODE_TEMP_NORES
10302       else:
10303         ecode = errors.ECODE_NORES
10304
10305       raise errors.OpPrereqError("Can't compute nodes using"
10306                                  " iallocator '%s': %s" %
10307                                  (self.op.iallocator, ial.info),
10308                                  ecode)
10309
10310     self.op.pnode = ial.result[0]
10311     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10312                  self.op.instance_name, self.op.iallocator,
10313                  utils.CommaJoin(ial.result))
10314
10315     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10316
10317     if req.RequiredNodes() == 2:
10318       self.op.snode = ial.result[1]
10319
10320   def BuildHooksEnv(self):
10321     """Build hooks env.
10322
10323     This runs on master, primary and secondary nodes of the instance.
10324
10325     """
10326     env = {
10327       "ADD_MODE": self.op.mode,
10328       }
10329     if self.op.mode == constants.INSTANCE_IMPORT:
10330       env["SRC_NODE"] = self.op.src_node
10331       env["SRC_PATH"] = self.op.src_path
10332       env["SRC_IMAGES"] = self.src_images
10333
10334     env.update(_BuildInstanceHookEnv(
10335       name=self.op.instance_name,
10336       primary_node=self.op.pnode,
10337       secondary_nodes=self.secondaries,
10338       status=self.op.start,
10339       os_type=self.op.os_type,
10340       minmem=self.be_full[constants.BE_MINMEM],
10341       maxmem=self.be_full[constants.BE_MAXMEM],
10342       vcpus=self.be_full[constants.BE_VCPUS],
10343       nics=_NICListToTuple(self, self.nics),
10344       disk_template=self.op.disk_template,
10345       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10346              for d in self.disks],
10347       bep=self.be_full,
10348       hvp=self.hv_full,
10349       hypervisor_name=self.op.hypervisor,
10350       tags=self.op.tags,
10351     ))
10352
10353     return env
10354
10355   def BuildHooksNodes(self):
10356     """Build hooks nodes.
10357
10358     """
10359     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10360     return nl, nl
10361
10362   def _ReadExportInfo(self):
10363     """Reads the export information from disk.
10364
10365     It will override the opcode source node and path with the actual
10366     information, if these two were not specified before.
10367
10368     @return: the export information
10369
10370     """
10371     assert self.op.mode == constants.INSTANCE_IMPORT
10372
10373     src_node = self.op.src_node
10374     src_path = self.op.src_path
10375
10376     if src_node is None:
10377       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10378       exp_list = self.rpc.call_export_list(locked_nodes)
10379       found = False
10380       for node in exp_list:
10381         if exp_list[node].fail_msg:
10382           continue
10383         if src_path in exp_list[node].payload:
10384           found = True
10385           self.op.src_node = src_node = node
10386           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10387                                                        src_path)
10388           break
10389       if not found:
10390         raise errors.OpPrereqError("No export found for relative path %s" %
10391                                     src_path, errors.ECODE_INVAL)
10392
10393     _CheckNodeOnline(self, src_node)
10394     result = self.rpc.call_export_info(src_node, src_path)
10395     result.Raise("No export or invalid export found in dir %s" % src_path)
10396
10397     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10398     if not export_info.has_section(constants.INISECT_EXP):
10399       raise errors.ProgrammerError("Corrupted export config",
10400                                    errors.ECODE_ENVIRON)
10401
10402     ei_version = export_info.get(constants.INISECT_EXP, "version")
10403     if (int(ei_version) != constants.EXPORT_VERSION):
10404       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10405                                  (ei_version, constants.EXPORT_VERSION),
10406                                  errors.ECODE_ENVIRON)
10407     return export_info
10408
10409   def _ReadExportParams(self, einfo):
10410     """Use export parameters as defaults.
10411
10412     In case the opcode doesn't specify (as in override) some instance
10413     parameters, then try to use them from the export information, if
10414     that declares them.
10415
10416     """
10417     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10418
10419     if self.op.disk_template is None:
10420       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10421         self.op.disk_template = einfo.get(constants.INISECT_INS,
10422                                           "disk_template")
10423         if self.op.disk_template not in constants.DISK_TEMPLATES:
10424           raise errors.OpPrereqError("Disk template specified in configuration"
10425                                      " file is not one of the allowed values:"
10426                                      " %s" %
10427                                      " ".join(constants.DISK_TEMPLATES),
10428                                      errors.ECODE_INVAL)
10429       else:
10430         raise errors.OpPrereqError("No disk template specified and the export"
10431                                    " is missing the disk_template information",
10432                                    errors.ECODE_INVAL)
10433
10434     if not self.op.disks:
10435       disks = []
10436       # TODO: import the disk iv_name too
10437       for idx in range(constants.MAX_DISKS):
10438         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10439           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10440           disks.append({constants.IDISK_SIZE: disk_sz})
10441       self.op.disks = disks
10442       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10443         raise errors.OpPrereqError("No disk info specified and the export"
10444                                    " is missing the disk information",
10445                                    errors.ECODE_INVAL)
10446
10447     if not self.op.nics:
10448       nics = []
10449       for idx in range(constants.MAX_NICS):
10450         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10451           ndict = {}
10452           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10453             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10454             ndict[name] = v
10455           nics.append(ndict)
10456         else:
10457           break
10458       self.op.nics = nics
10459
10460     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10461       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10462
10463     if (self.op.hypervisor is None and
10464         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10465       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10466
10467     if einfo.has_section(constants.INISECT_HYP):
10468       # use the export parameters but do not override the ones
10469       # specified by the user
10470       for name, value in einfo.items(constants.INISECT_HYP):
10471         if name not in self.op.hvparams:
10472           self.op.hvparams[name] = value
10473
10474     if einfo.has_section(constants.INISECT_BEP):
10475       # use the parameters, without overriding
10476       for name, value in einfo.items(constants.INISECT_BEP):
10477         if name not in self.op.beparams:
10478           self.op.beparams[name] = value
10479         # Compatibility for the old "memory" be param
10480         if name == constants.BE_MEMORY:
10481           if constants.BE_MAXMEM not in self.op.beparams:
10482             self.op.beparams[constants.BE_MAXMEM] = value
10483           if constants.BE_MINMEM not in self.op.beparams:
10484             self.op.beparams[constants.BE_MINMEM] = value
10485     else:
10486       # try to read the parameters old style, from the main section
10487       for name in constants.BES_PARAMETERS:
10488         if (name not in self.op.beparams and
10489             einfo.has_option(constants.INISECT_INS, name)):
10490           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10491
10492     if einfo.has_section(constants.INISECT_OSP):
10493       # use the parameters, without overriding
10494       for name, value in einfo.items(constants.INISECT_OSP):
10495         if name not in self.op.osparams:
10496           self.op.osparams[name] = value
10497
10498   def _RevertToDefaults(self, cluster):
10499     """Revert the instance parameters to the default values.
10500
10501     """
10502     # hvparams
10503     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10504     for name in self.op.hvparams.keys():
10505       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10506         del self.op.hvparams[name]
10507     # beparams
10508     be_defs = cluster.SimpleFillBE({})
10509     for name in self.op.beparams.keys():
10510       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10511         del self.op.beparams[name]
10512     # nic params
10513     nic_defs = cluster.SimpleFillNIC({})
10514     for nic in self.op.nics:
10515       for name in constants.NICS_PARAMETERS:
10516         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10517           del nic[name]
10518     # osparams
10519     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10520     for name in self.op.osparams.keys():
10521       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10522         del self.op.osparams[name]
10523
10524   def _CalculateFileStorageDir(self):
10525     """Calculate final instance file storage dir.
10526
10527     """
10528     # file storage dir calculation/check
10529     self.instance_file_storage_dir = None
10530     if self.op.disk_template in constants.DTS_FILEBASED:
10531       # build the full file storage dir path
10532       joinargs = []
10533
10534       if self.op.disk_template == constants.DT_SHARED_FILE:
10535         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10536       else:
10537         get_fsd_fn = self.cfg.GetFileStorageDir
10538
10539       cfg_storagedir = get_fsd_fn()
10540       if not cfg_storagedir:
10541         raise errors.OpPrereqError("Cluster file storage dir not defined",
10542                                    errors.ECODE_STATE)
10543       joinargs.append(cfg_storagedir)
10544
10545       if self.op.file_storage_dir is not None:
10546         joinargs.append(self.op.file_storage_dir)
10547
10548       joinargs.append(self.op.instance_name)
10549
10550       # pylint: disable=W0142
10551       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10552
10553   def CheckPrereq(self): # pylint: disable=R0914
10554     """Check prerequisites.
10555
10556     """
10557     self._CalculateFileStorageDir()
10558
10559     if self.op.mode == constants.INSTANCE_IMPORT:
10560       export_info = self._ReadExportInfo()
10561       self._ReadExportParams(export_info)
10562       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10563     else:
10564       self._old_instance_name = None
10565
10566     if (not self.cfg.GetVGName() and
10567         self.op.disk_template not in constants.DTS_NOT_LVM):
10568       raise errors.OpPrereqError("Cluster does not support lvm-based"
10569                                  " instances", errors.ECODE_STATE)
10570
10571     if (self.op.hypervisor is None or
10572         self.op.hypervisor == constants.VALUE_AUTO):
10573       self.op.hypervisor = self.cfg.GetHypervisorType()
10574
10575     cluster = self.cfg.GetClusterInfo()
10576     enabled_hvs = cluster.enabled_hypervisors
10577     if self.op.hypervisor not in enabled_hvs:
10578       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10579                                  " cluster (%s)" %
10580                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10581                                  errors.ECODE_STATE)
10582
10583     # Check tag validity
10584     for tag in self.op.tags:
10585       objects.TaggableObject.ValidateTag(tag)
10586
10587     # check hypervisor parameter syntax (locally)
10588     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10589     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10590                                       self.op.hvparams)
10591     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10592     hv_type.CheckParameterSyntax(filled_hvp)
10593     self.hv_full = filled_hvp
10594     # check that we don't specify global parameters on an instance
10595     _CheckGlobalHvParams(self.op.hvparams)
10596
10597     # fill and remember the beparams dict
10598     self.be_full = _ComputeFullBeParams(self.op, cluster)
10599
10600     # build os parameters
10601     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10602
10603     # now that hvp/bep are in final format, let's reset to defaults,
10604     # if told to do so
10605     if self.op.identify_defaults:
10606       self._RevertToDefaults(cluster)
10607
10608     # NIC buildup
10609     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10610                              self.proc.GetECId())
10611
10612     # disk checks/pre-build
10613     default_vg = self.cfg.GetVGName()
10614     self.disks = _ComputeDisks(self.op, default_vg)
10615
10616     if self.op.mode == constants.INSTANCE_IMPORT:
10617       disk_images = []
10618       for idx in range(len(self.disks)):
10619         option = "disk%d_dump" % idx
10620         if export_info.has_option(constants.INISECT_INS, option):
10621           # FIXME: are the old os-es, disk sizes, etc. useful?
10622           export_name = export_info.get(constants.INISECT_INS, option)
10623           image = utils.PathJoin(self.op.src_path, export_name)
10624           disk_images.append(image)
10625         else:
10626           disk_images.append(False)
10627
10628       self.src_images = disk_images
10629
10630       if self.op.instance_name == self._old_instance_name:
10631         for idx, nic in enumerate(self.nics):
10632           if nic.mac == constants.VALUE_AUTO:
10633             nic_mac_ini = "nic%d_mac" % idx
10634             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10635
10636     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10637
10638     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10639     if self.op.ip_check:
10640       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10641         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10642                                    (self.check_ip, self.op.instance_name),
10643                                    errors.ECODE_NOTUNIQUE)
10644
10645     #### mac address generation
10646     # By generating here the mac address both the allocator and the hooks get
10647     # the real final mac address rather than the 'auto' or 'generate' value.
10648     # There is a race condition between the generation and the instance object
10649     # creation, which means that we know the mac is valid now, but we're not
10650     # sure it will be when we actually add the instance. If things go bad
10651     # adding the instance will abort because of a duplicate mac, and the
10652     # creation job will fail.
10653     for nic in self.nics:
10654       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10655         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10656
10657     #### allocator run
10658
10659     if self.op.iallocator is not None:
10660       self._RunAllocator()
10661
10662     # Release all unneeded node locks
10663     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10664     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10665     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10666     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10667
10668     assert (self.owned_locks(locking.LEVEL_NODE) ==
10669             self.owned_locks(locking.LEVEL_NODE_RES)), \
10670       "Node locks differ from node resource locks"
10671
10672     #### node related checks
10673
10674     # check primary node
10675     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10676     assert self.pnode is not None, \
10677       "Cannot retrieve locked node %s" % self.op.pnode
10678     if pnode.offline:
10679       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10680                                  pnode.name, errors.ECODE_STATE)
10681     if pnode.drained:
10682       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10683                                  pnode.name, errors.ECODE_STATE)
10684     if not pnode.vm_capable:
10685       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10686                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10687
10688     self.secondaries = []
10689
10690     # Fill in any IPs from IP pools. This must happen here, because we need to
10691     # know the nic's primary node, as specified by the iallocator
10692     for idx, nic in enumerate(self.nics):
10693       net = nic.network
10694       if net is not None:
10695         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10696         if netparams is None:
10697           raise errors.OpPrereqError("No netparams found for network"
10698                                      " %s. Propably not connected to"
10699                                      " node's %s nodegroup" %
10700                                      (net, self.pnode.name),
10701                                      errors.ECODE_INVAL)
10702         self.LogInfo("NIC/%d inherits netparams %s" %
10703                      (idx, netparams.values()))
10704         nic.nicparams = dict(netparams)
10705         if nic.ip is not None:
10706           if nic.ip.lower() == constants.NIC_IP_POOL:
10707             try:
10708               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10709             except errors.ReservationError:
10710               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10711                                          " from the address pool" % idx,
10712                                          errors.ECODE_STATE)
10713             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10714           else:
10715             try:
10716               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10717             except errors.ReservationError:
10718               raise errors.OpPrereqError("IP address %s already in use"
10719                                          " or does not belong to network %s" %
10720                                          (nic.ip, net),
10721                                          errors.ECODE_NOTUNIQUE)
10722
10723       # net is None, ip None or given
10724       elif self.op.conflicts_check:
10725         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10726
10727     # mirror node verification
10728     if self.op.disk_template in constants.DTS_INT_MIRROR:
10729       if self.op.snode == pnode.name:
10730         raise errors.OpPrereqError("The secondary node cannot be the"
10731                                    " primary node", errors.ECODE_INVAL)
10732       _CheckNodeOnline(self, self.op.snode)
10733       _CheckNodeNotDrained(self, self.op.snode)
10734       _CheckNodeVmCapable(self, self.op.snode)
10735       self.secondaries.append(self.op.snode)
10736
10737       snode = self.cfg.GetNodeInfo(self.op.snode)
10738       if pnode.group != snode.group:
10739         self.LogWarning("The primary and secondary nodes are in two"
10740                         " different node groups; the disk parameters"
10741                         " from the first disk's node group will be"
10742                         " used")
10743
10744     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10745       nodes = [pnode]
10746       if self.op.disk_template in constants.DTS_INT_MIRROR:
10747         nodes.append(snode)
10748       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10749       if compat.any(map(has_es, nodes)):
10750         raise errors.OpPrereqError("Disk template %s not supported with"
10751                                    " exclusive storage" % self.op.disk_template,
10752                                    errors.ECODE_STATE)
10753
10754     nodenames = [pnode.name] + self.secondaries
10755
10756     # Verify instance specs
10757     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10758     ispec = {
10759       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10760       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10761       constants.ISPEC_DISK_COUNT: len(self.disks),
10762       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10763       constants.ISPEC_NIC_COUNT: len(self.nics),
10764       constants.ISPEC_SPINDLE_USE: spindle_use,
10765       }
10766
10767     group_info = self.cfg.GetNodeGroup(pnode.group)
10768     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10769     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10770     if not self.op.ignore_ipolicy and res:
10771       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10772              (pnode.group, group_info.name, utils.CommaJoin(res)))
10773       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10774
10775     if not self.adopt_disks:
10776       if self.op.disk_template == constants.DT_RBD:
10777         # _CheckRADOSFreeSpace() is just a placeholder.
10778         # Any function that checks prerequisites can be placed here.
10779         # Check if there is enough space on the RADOS cluster.
10780         _CheckRADOSFreeSpace()
10781       elif self.op.disk_template == constants.DT_EXT:
10782         # FIXME: Function that checks prereqs if needed
10783         pass
10784       else:
10785         # Check lv size requirements, if not adopting
10786         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10787         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10788
10789     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10790       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10791                                 disk[constants.IDISK_ADOPT])
10792                      for disk in self.disks])
10793       if len(all_lvs) != len(self.disks):
10794         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10795                                    errors.ECODE_INVAL)
10796       for lv_name in all_lvs:
10797         try:
10798           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10799           # to ReserveLV uses the same syntax
10800           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10801         except errors.ReservationError:
10802           raise errors.OpPrereqError("LV named %s used by another instance" %
10803                                      lv_name, errors.ECODE_NOTUNIQUE)
10804
10805       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10806       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10807
10808       node_lvs = self.rpc.call_lv_list([pnode.name],
10809                                        vg_names.payload.keys())[pnode.name]
10810       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10811       node_lvs = node_lvs.payload
10812
10813       delta = all_lvs.difference(node_lvs.keys())
10814       if delta:
10815         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10816                                    utils.CommaJoin(delta),
10817                                    errors.ECODE_INVAL)
10818       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10819       if online_lvs:
10820         raise errors.OpPrereqError("Online logical volumes found, cannot"
10821                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10822                                    errors.ECODE_STATE)
10823       # update the size of disk based on what is found
10824       for dsk in self.disks:
10825         dsk[constants.IDISK_SIZE] = \
10826           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10827                                         dsk[constants.IDISK_ADOPT])][0]))
10828
10829     elif self.op.disk_template == constants.DT_BLOCK:
10830       # Normalize and de-duplicate device paths
10831       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10832                        for disk in self.disks])
10833       if len(all_disks) != len(self.disks):
10834         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10835                                    errors.ECODE_INVAL)
10836       baddisks = [d for d in all_disks
10837                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10838       if baddisks:
10839         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10840                                    " cannot be adopted" %
10841                                    (utils.CommaJoin(baddisks),
10842                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10843                                    errors.ECODE_INVAL)
10844
10845       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10846                                             list(all_disks))[pnode.name]
10847       node_disks.Raise("Cannot get block device information from node %s" %
10848                        pnode.name)
10849       node_disks = node_disks.payload
10850       delta = all_disks.difference(node_disks.keys())
10851       if delta:
10852         raise errors.OpPrereqError("Missing block device(s): %s" %
10853                                    utils.CommaJoin(delta),
10854                                    errors.ECODE_INVAL)
10855       for dsk in self.disks:
10856         dsk[constants.IDISK_SIZE] = \
10857           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10858
10859     # Verify instance specs
10860     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10861     ispec = {
10862       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10863       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10864       constants.ISPEC_DISK_COUNT: len(self.disks),
10865       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10866                                   for disk in self.disks],
10867       constants.ISPEC_NIC_COUNT: len(self.nics),
10868       constants.ISPEC_SPINDLE_USE: spindle_use,
10869       }
10870
10871     group_info = self.cfg.GetNodeGroup(pnode.group)
10872     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10873     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10874     if not self.op.ignore_ipolicy and res:
10875       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10876                                   " policy: %s") % (pnode.group,
10877                                                     utils.CommaJoin(res)),
10878                                   errors.ECODE_INVAL)
10879
10880     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10881
10882     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10883     # check OS parameters (remotely)
10884     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10885
10886     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10887
10888     #TODO: _CheckExtParams (remotely)
10889     # Check parameters for extstorage
10890
10891     # memory check on primary node
10892     #TODO(dynmem): use MINMEM for checking
10893     if self.op.start:
10894       _CheckNodeFreeMemory(self, self.pnode.name,
10895                            "creating instance %s" % self.op.instance_name,
10896                            self.be_full[constants.BE_MAXMEM],
10897                            self.op.hypervisor)
10898
10899     self.dry_run_result = list(nodenames)
10900
10901   def Exec(self, feedback_fn):
10902     """Create and add the instance to the cluster.
10903
10904     """
10905     instance = self.op.instance_name
10906     pnode_name = self.pnode.name
10907
10908     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10909                 self.owned_locks(locking.LEVEL_NODE)), \
10910       "Node locks differ from node resource locks"
10911     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10912
10913     ht_kind = self.op.hypervisor
10914     if ht_kind in constants.HTS_REQ_PORT:
10915       network_port = self.cfg.AllocatePort()
10916     else:
10917       network_port = None
10918
10919     # This is ugly but we got a chicken-egg problem here
10920     # We can only take the group disk parameters, as the instance
10921     # has no disks yet (we are generating them right here).
10922     node = self.cfg.GetNodeInfo(pnode_name)
10923     nodegroup = self.cfg.GetNodeGroup(node.group)
10924     disks = _GenerateDiskTemplate(self,
10925                                   self.op.disk_template,
10926                                   instance, pnode_name,
10927                                   self.secondaries,
10928                                   self.disks,
10929                                   self.instance_file_storage_dir,
10930                                   self.op.file_driver,
10931                                   0,
10932                                   feedback_fn,
10933                                   self.cfg.GetGroupDiskParams(nodegroup))
10934
10935     iobj = objects.Instance(name=instance, os=self.op.os_type,
10936                             primary_node=pnode_name,
10937                             nics=self.nics, disks=disks,
10938                             disk_template=self.op.disk_template,
10939                             admin_state=constants.ADMINST_DOWN,
10940                             network_port=network_port,
10941                             beparams=self.op.beparams,
10942                             hvparams=self.op.hvparams,
10943                             hypervisor=self.op.hypervisor,
10944                             osparams=self.op.osparams,
10945                             )
10946
10947     if self.op.tags:
10948       for tag in self.op.tags:
10949         iobj.AddTag(tag)
10950
10951     if self.adopt_disks:
10952       if self.op.disk_template == constants.DT_PLAIN:
10953         # rename LVs to the newly-generated names; we need to construct
10954         # 'fake' LV disks with the old data, plus the new unique_id
10955         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10956         rename_to = []
10957         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10958           rename_to.append(t_dsk.logical_id)
10959           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10960           self.cfg.SetDiskID(t_dsk, pnode_name)
10961         result = self.rpc.call_blockdev_rename(pnode_name,
10962                                                zip(tmp_disks, rename_to))
10963         result.Raise("Failed to rename adoped LVs")
10964     else:
10965       feedback_fn("* creating instance disks...")
10966       try:
10967         _CreateDisks(self, iobj)
10968       except errors.OpExecError:
10969         self.LogWarning("Device creation failed, reverting...")
10970         try:
10971           _RemoveDisks(self, iobj)
10972         finally:
10973           self.cfg.ReleaseDRBDMinors(instance)
10974           raise
10975
10976     feedback_fn("adding instance %s to cluster config" % instance)
10977
10978     self.cfg.AddInstance(iobj, self.proc.GetECId())
10979
10980     # Declare that we don't want to remove the instance lock anymore, as we've
10981     # added the instance to the config
10982     del self.remove_locks[locking.LEVEL_INSTANCE]
10983
10984     if self.op.mode == constants.INSTANCE_IMPORT:
10985       # Release unused nodes
10986       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10987     else:
10988       # Release all nodes
10989       _ReleaseLocks(self, locking.LEVEL_NODE)
10990
10991     disk_abort = False
10992     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10993       feedback_fn("* wiping instance disks...")
10994       try:
10995         _WipeDisks(self, iobj)
10996       except errors.OpExecError, err:
10997         logging.exception("Wiping disks failed")
10998         self.LogWarning("Wiping instance disks failed (%s)", err)
10999         disk_abort = True
11000
11001     if disk_abort:
11002       # Something is already wrong with the disks, don't do anything else
11003       pass
11004     elif self.op.wait_for_sync:
11005       disk_abort = not _WaitForSync(self, iobj)
11006     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11007       # make sure the disks are not degraded (still sync-ing is ok)
11008       feedback_fn("* checking mirrors status")
11009       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11010     else:
11011       disk_abort = False
11012
11013     if disk_abort:
11014       _RemoveDisks(self, iobj)
11015       self.cfg.RemoveInstance(iobj.name)
11016       # Make sure the instance lock gets removed
11017       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11018       raise errors.OpExecError("There are some degraded disks for"
11019                                " this instance")
11020
11021     # Release all node resource locks
11022     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11023
11024     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11025       # we need to set the disks ID to the primary node, since the
11026       # preceding code might or might have not done it, depending on
11027       # disk template and other options
11028       for disk in iobj.disks:
11029         self.cfg.SetDiskID(disk, pnode_name)
11030       if self.op.mode == constants.INSTANCE_CREATE:
11031         if not self.op.no_install:
11032           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11033                         not self.op.wait_for_sync)
11034           if pause_sync:
11035             feedback_fn("* pausing disk sync to install instance OS")
11036             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11037                                                               (iobj.disks,
11038                                                                iobj), True)
11039             for idx, success in enumerate(result.payload):
11040               if not success:
11041                 logging.warn("pause-sync of instance %s for disk %d failed",
11042                              instance, idx)
11043
11044           feedback_fn("* running the instance OS create scripts...")
11045           # FIXME: pass debug option from opcode to backend
11046           os_add_result = \
11047             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11048                                           self.op.debug_level)
11049           if pause_sync:
11050             feedback_fn("* resuming disk sync")
11051             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11052                                                               (iobj.disks,
11053                                                                iobj), False)
11054             for idx, success in enumerate(result.payload):
11055               if not success:
11056                 logging.warn("resume-sync of instance %s for disk %d failed",
11057                              instance, idx)
11058
11059           os_add_result.Raise("Could not add os for instance %s"
11060                               " on node %s" % (instance, pnode_name))
11061
11062       else:
11063         if self.op.mode == constants.INSTANCE_IMPORT:
11064           feedback_fn("* running the instance OS import scripts...")
11065
11066           transfers = []
11067
11068           for idx, image in enumerate(self.src_images):
11069             if not image:
11070               continue
11071
11072             # FIXME: pass debug option from opcode to backend
11073             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11074                                                constants.IEIO_FILE, (image, ),
11075                                                constants.IEIO_SCRIPT,
11076                                                (iobj.disks[idx], idx),
11077                                                None)
11078             transfers.append(dt)
11079
11080           import_result = \
11081             masterd.instance.TransferInstanceData(self, feedback_fn,
11082                                                   self.op.src_node, pnode_name,
11083                                                   self.pnode.secondary_ip,
11084                                                   iobj, transfers)
11085           if not compat.all(import_result):
11086             self.LogWarning("Some disks for instance %s on node %s were not"
11087                             " imported successfully" % (instance, pnode_name))
11088
11089           rename_from = self._old_instance_name
11090
11091         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11092           feedback_fn("* preparing remote import...")
11093           # The source cluster will stop the instance before attempting to make
11094           # a connection. In some cases stopping an instance can take a long
11095           # time, hence the shutdown timeout is added to the connection
11096           # timeout.
11097           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11098                              self.op.source_shutdown_timeout)
11099           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11100
11101           assert iobj.primary_node == self.pnode.name
11102           disk_results = \
11103             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11104                                           self.source_x509_ca,
11105                                           self._cds, timeouts)
11106           if not compat.all(disk_results):
11107             # TODO: Should the instance still be started, even if some disks
11108             # failed to import (valid for local imports, too)?
11109             self.LogWarning("Some disks for instance %s on node %s were not"
11110                             " imported successfully" % (instance, pnode_name))
11111
11112           rename_from = self.source_instance_name
11113
11114         else:
11115           # also checked in the prereq part
11116           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11117                                        % self.op.mode)
11118
11119         # Run rename script on newly imported instance
11120         assert iobj.name == instance
11121         feedback_fn("Running rename script for %s" % instance)
11122         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11123                                                    rename_from,
11124                                                    self.op.debug_level)
11125         if result.fail_msg:
11126           self.LogWarning("Failed to run rename script for %s on node"
11127                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11128
11129     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11130
11131     if self.op.start:
11132       iobj.admin_state = constants.ADMINST_UP
11133       self.cfg.Update(iobj, feedback_fn)
11134       logging.info("Starting instance %s on node %s", instance, pnode_name)
11135       feedback_fn("* starting instance...")
11136       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11137                                             False)
11138       result.Raise("Could not start instance")
11139
11140     return list(iobj.all_nodes)
11141
11142
11143 class LUInstanceMultiAlloc(NoHooksLU):
11144   """Allocates multiple instances at the same time.
11145
11146   """
11147   REQ_BGL = False
11148
11149   def CheckArguments(self):
11150     """Check arguments.
11151
11152     """
11153     nodes = []
11154     for inst in self.op.instances:
11155       if inst.iallocator is not None:
11156         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11157                                    " instance objects", errors.ECODE_INVAL)
11158       nodes.append(bool(inst.pnode))
11159       if inst.disk_template in constants.DTS_INT_MIRROR:
11160         nodes.append(bool(inst.snode))
11161
11162     has_nodes = compat.any(nodes)
11163     if compat.all(nodes) ^ has_nodes:
11164       raise errors.OpPrereqError("There are instance objects providing"
11165                                  " pnode/snode while others do not",
11166                                  errors.ECODE_INVAL)
11167
11168     if self.op.iallocator is None:
11169       default_iallocator = self.cfg.GetDefaultIAllocator()
11170       if default_iallocator and has_nodes:
11171         self.op.iallocator = default_iallocator
11172       else:
11173         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11174                                    " given and no cluster-wide default"
11175                                    " iallocator found; please specify either"
11176                                    " an iallocator or nodes on the instances"
11177                                    " or set a cluster-wide default iallocator",
11178                                    errors.ECODE_INVAL)
11179
11180     _CheckOpportunisticLocking(self.op)
11181
11182     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11183     if dups:
11184       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11185                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11186
11187   def ExpandNames(self):
11188     """Calculate the locks.
11189
11190     """
11191     self.share_locks = _ShareAll()
11192     self.needed_locks = {
11193       # iallocator will select nodes and even if no iallocator is used,
11194       # collisions with LUInstanceCreate should be avoided
11195       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11196       }
11197
11198     if self.op.iallocator:
11199       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11200       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11201
11202       if self.op.opportunistic_locking:
11203         self.opportunistic_locks[locking.LEVEL_NODE] = True
11204         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11205     else:
11206       nodeslist = []
11207       for inst in self.op.instances:
11208         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11209         nodeslist.append(inst.pnode)
11210         if inst.snode is not None:
11211           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11212           nodeslist.append(inst.snode)
11213
11214       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11215       # Lock resources of instance's primary and secondary nodes (copy to
11216       # prevent accidential modification)
11217       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11218
11219   def CheckPrereq(self):
11220     """Check prerequisite.
11221
11222     """
11223     cluster = self.cfg.GetClusterInfo()
11224     default_vg = self.cfg.GetVGName()
11225     ec_id = self.proc.GetECId()
11226
11227     if self.op.opportunistic_locking:
11228       # Only consider nodes for which a lock is held
11229       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11230     else:
11231       node_whitelist = None
11232
11233     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11234                                          _ComputeNics(op, cluster, None,
11235                                                       self.cfg, ec_id),
11236                                          _ComputeFullBeParams(op, cluster),
11237                                          node_whitelist)
11238              for op in self.op.instances]
11239
11240     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11241     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11242
11243     ial.Run(self.op.iallocator)
11244
11245     if not ial.success:
11246       raise errors.OpPrereqError("Can't compute nodes using"
11247                                  " iallocator '%s': %s" %
11248                                  (self.op.iallocator, ial.info),
11249                                  errors.ECODE_NORES)
11250
11251     self.ia_result = ial.result
11252
11253     if self.op.dry_run:
11254       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11255         constants.JOB_IDS_KEY: [],
11256         })
11257
11258   def _ConstructPartialResult(self):
11259     """Contructs the partial result.
11260
11261     """
11262     (allocatable, failed) = self.ia_result
11263     return {
11264       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11265         map(compat.fst, allocatable),
11266       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11267       }
11268
11269   def Exec(self, feedback_fn):
11270     """Executes the opcode.
11271
11272     """
11273     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11274     (allocatable, failed) = self.ia_result
11275
11276     jobs = []
11277     for (name, nodes) in allocatable:
11278       op = op2inst.pop(name)
11279
11280       if len(nodes) > 1:
11281         (op.pnode, op.snode) = nodes
11282       else:
11283         (op.pnode,) = nodes
11284
11285       jobs.append([op])
11286
11287     missing = set(op2inst.keys()) - set(failed)
11288     assert not missing, \
11289       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11290
11291     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11292
11293
11294 def _CheckRADOSFreeSpace():
11295   """Compute disk size requirements inside the RADOS cluster.
11296
11297   """
11298   # For the RADOS cluster we assume there is always enough space.
11299   pass
11300
11301
11302 class LUInstanceConsole(NoHooksLU):
11303   """Connect to an instance's console.
11304
11305   This is somewhat special in that it returns the command line that
11306   you need to run on the master node in order to connect to the
11307   console.
11308
11309   """
11310   REQ_BGL = False
11311
11312   def ExpandNames(self):
11313     self.share_locks = _ShareAll()
11314     self._ExpandAndLockInstance()
11315
11316   def CheckPrereq(self):
11317     """Check prerequisites.
11318
11319     This checks that the instance is in the cluster.
11320
11321     """
11322     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11323     assert self.instance is not None, \
11324       "Cannot retrieve locked instance %s" % self.op.instance_name
11325     _CheckNodeOnline(self, self.instance.primary_node)
11326
11327   def Exec(self, feedback_fn):
11328     """Connect to the console of an instance
11329
11330     """
11331     instance = self.instance
11332     node = instance.primary_node
11333
11334     node_insts = self.rpc.call_instance_list([node],
11335                                              [instance.hypervisor])[node]
11336     node_insts.Raise("Can't get node information from %s" % node)
11337
11338     if instance.name not in node_insts.payload:
11339       if instance.admin_state == constants.ADMINST_UP:
11340         state = constants.INSTST_ERRORDOWN
11341       elif instance.admin_state == constants.ADMINST_DOWN:
11342         state = constants.INSTST_ADMINDOWN
11343       else:
11344         state = constants.INSTST_ADMINOFFLINE
11345       raise errors.OpExecError("Instance %s is not running (state %s)" %
11346                                (instance.name, state))
11347
11348     logging.debug("Connecting to console of %s on %s", instance.name, node)
11349
11350     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11351
11352
11353 def _GetInstanceConsole(cluster, instance):
11354   """Returns console information for an instance.
11355
11356   @type cluster: L{objects.Cluster}
11357   @type instance: L{objects.Instance}
11358   @rtype: dict
11359
11360   """
11361   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11362   # beparams and hvparams are passed separately, to avoid editing the
11363   # instance and then saving the defaults in the instance itself.
11364   hvparams = cluster.FillHV(instance)
11365   beparams = cluster.FillBE(instance)
11366   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11367
11368   assert console.instance == instance.name
11369   assert console.Validate()
11370
11371   return console.ToDict()
11372
11373
11374 class LUInstanceReplaceDisks(LogicalUnit):
11375   """Replace the disks of an instance.
11376
11377   """
11378   HPATH = "mirrors-replace"
11379   HTYPE = constants.HTYPE_INSTANCE
11380   REQ_BGL = False
11381
11382   def CheckArguments(self):
11383     """Check arguments.
11384
11385     """
11386     remote_node = self.op.remote_node
11387     ialloc = self.op.iallocator
11388     if self.op.mode == constants.REPLACE_DISK_CHG:
11389       if remote_node is None and ialloc is None:
11390         raise errors.OpPrereqError("When changing the secondary either an"
11391                                    " iallocator script must be used or the"
11392                                    " new node given", errors.ECODE_INVAL)
11393       else:
11394         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11395
11396     elif remote_node is not None or ialloc is not None:
11397       # Not replacing the secondary
11398       raise errors.OpPrereqError("The iallocator and new node options can"
11399                                  " only be used when changing the"
11400                                  " secondary node", errors.ECODE_INVAL)
11401
11402   def ExpandNames(self):
11403     self._ExpandAndLockInstance()
11404
11405     assert locking.LEVEL_NODE not in self.needed_locks
11406     assert locking.LEVEL_NODE_RES not in self.needed_locks
11407     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11408
11409     assert self.op.iallocator is None or self.op.remote_node is None, \
11410       "Conflicting options"
11411
11412     if self.op.remote_node is not None:
11413       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11414
11415       # Warning: do not remove the locking of the new secondary here
11416       # unless DRBD8.AddChildren is changed to work in parallel;
11417       # currently it doesn't since parallel invocations of
11418       # FindUnusedMinor will conflict
11419       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11420       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11421     else:
11422       self.needed_locks[locking.LEVEL_NODE] = []
11423       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11424
11425       if self.op.iallocator is not None:
11426         # iallocator will select a new node in the same group
11427         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11428         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11429
11430     self.needed_locks[locking.LEVEL_NODE_RES] = []
11431
11432     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11433                                    self.op.iallocator, self.op.remote_node,
11434                                    self.op.disks, self.op.early_release,
11435                                    self.op.ignore_ipolicy)
11436
11437     self.tasklets = [self.replacer]
11438
11439   def DeclareLocks(self, level):
11440     if level == locking.LEVEL_NODEGROUP:
11441       assert self.op.remote_node is None
11442       assert self.op.iallocator is not None
11443       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11444
11445       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11446       # Lock all groups used by instance optimistically; this requires going
11447       # via the node before it's locked, requiring verification later on
11448       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11449         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11450
11451     elif level == locking.LEVEL_NODE:
11452       if self.op.iallocator is not None:
11453         assert self.op.remote_node is None
11454         assert not self.needed_locks[locking.LEVEL_NODE]
11455         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11456
11457         # Lock member nodes of all locked groups
11458         self.needed_locks[locking.LEVEL_NODE] = \
11459             [node_name
11460              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11461              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11462       else:
11463         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11464
11465         self._LockInstancesNodes()
11466
11467     elif level == locking.LEVEL_NODE_RES:
11468       # Reuse node locks
11469       self.needed_locks[locking.LEVEL_NODE_RES] = \
11470         self.needed_locks[locking.LEVEL_NODE]
11471
11472   def BuildHooksEnv(self):
11473     """Build hooks env.
11474
11475     This runs on the master, the primary and all the secondaries.
11476
11477     """
11478     instance = self.replacer.instance
11479     env = {
11480       "MODE": self.op.mode,
11481       "NEW_SECONDARY": self.op.remote_node,
11482       "OLD_SECONDARY": instance.secondary_nodes[0],
11483       }
11484     env.update(_BuildInstanceHookEnvByObject(self, instance))
11485     return env
11486
11487   def BuildHooksNodes(self):
11488     """Build hooks nodes.
11489
11490     """
11491     instance = self.replacer.instance
11492     nl = [
11493       self.cfg.GetMasterNode(),
11494       instance.primary_node,
11495       ]
11496     if self.op.remote_node is not None:
11497       nl.append(self.op.remote_node)
11498     return nl, nl
11499
11500   def CheckPrereq(self):
11501     """Check prerequisites.
11502
11503     """
11504     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11505             self.op.iallocator is None)
11506
11507     # Verify if node group locks are still correct
11508     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11509     if owned_groups:
11510       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11511
11512     return LogicalUnit.CheckPrereq(self)
11513
11514
11515 class TLReplaceDisks(Tasklet):
11516   """Replaces disks for an instance.
11517
11518   Note: Locking is not within the scope of this class.
11519
11520   """
11521   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11522                disks, early_release, ignore_ipolicy):
11523     """Initializes this class.
11524
11525     """
11526     Tasklet.__init__(self, lu)
11527
11528     # Parameters
11529     self.instance_name = instance_name
11530     self.mode = mode
11531     self.iallocator_name = iallocator_name
11532     self.remote_node = remote_node
11533     self.disks = disks
11534     self.early_release = early_release
11535     self.ignore_ipolicy = ignore_ipolicy
11536
11537     # Runtime data
11538     self.instance = None
11539     self.new_node = None
11540     self.target_node = None
11541     self.other_node = None
11542     self.remote_node_info = None
11543     self.node_secondary_ip = None
11544
11545   @staticmethod
11546   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11547     """Compute a new secondary node using an IAllocator.
11548
11549     """
11550     req = iallocator.IAReqRelocate(name=instance_name,
11551                                    relocate_from=list(relocate_from))
11552     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11553
11554     ial.Run(iallocator_name)
11555
11556     if not ial.success:
11557       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11558                                  " %s" % (iallocator_name, ial.info),
11559                                  errors.ECODE_NORES)
11560
11561     remote_node_name = ial.result[0]
11562
11563     lu.LogInfo("Selected new secondary for instance '%s': %s",
11564                instance_name, remote_node_name)
11565
11566     return remote_node_name
11567
11568   def _FindFaultyDisks(self, node_name):
11569     """Wrapper for L{_FindFaultyInstanceDisks}.
11570
11571     """
11572     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11573                                     node_name, True)
11574
11575   def _CheckDisksActivated(self, instance):
11576     """Checks if the instance disks are activated.
11577
11578     @param instance: The instance to check disks
11579     @return: True if they are activated, False otherwise
11580
11581     """
11582     nodes = instance.all_nodes
11583
11584     for idx, dev in enumerate(instance.disks):
11585       for node in nodes:
11586         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11587         self.cfg.SetDiskID(dev, node)
11588
11589         result = _BlockdevFind(self, node, dev, instance)
11590
11591         if result.offline:
11592           continue
11593         elif result.fail_msg or not result.payload:
11594           return False
11595
11596     return True
11597
11598   def CheckPrereq(self):
11599     """Check prerequisites.
11600
11601     This checks that the instance is in the cluster.
11602
11603     """
11604     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11605     assert instance is not None, \
11606       "Cannot retrieve locked instance %s" % self.instance_name
11607
11608     if instance.disk_template != constants.DT_DRBD8:
11609       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11610                                  " instances", errors.ECODE_INVAL)
11611
11612     if len(instance.secondary_nodes) != 1:
11613       raise errors.OpPrereqError("The instance has a strange layout,"
11614                                  " expected one secondary but found %d" %
11615                                  len(instance.secondary_nodes),
11616                                  errors.ECODE_FAULT)
11617
11618     instance = self.instance
11619     secondary_node = instance.secondary_nodes[0]
11620
11621     if self.iallocator_name is None:
11622       remote_node = self.remote_node
11623     else:
11624       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11625                                        instance.name, instance.secondary_nodes)
11626
11627     if remote_node is None:
11628       self.remote_node_info = None
11629     else:
11630       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11631              "Remote node '%s' is not locked" % remote_node
11632
11633       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11634       assert self.remote_node_info is not None, \
11635         "Cannot retrieve locked node %s" % remote_node
11636
11637     if remote_node == self.instance.primary_node:
11638       raise errors.OpPrereqError("The specified node is the primary node of"
11639                                  " the instance", errors.ECODE_INVAL)
11640
11641     if remote_node == secondary_node:
11642       raise errors.OpPrereqError("The specified node is already the"
11643                                  " secondary node of the instance",
11644                                  errors.ECODE_INVAL)
11645
11646     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11647                                     constants.REPLACE_DISK_CHG):
11648       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11649                                  errors.ECODE_INVAL)
11650
11651     if self.mode == constants.REPLACE_DISK_AUTO:
11652       if not self._CheckDisksActivated(instance):
11653         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11654                                    " first" % self.instance_name,
11655                                    errors.ECODE_STATE)
11656       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11657       faulty_secondary = self._FindFaultyDisks(secondary_node)
11658
11659       if faulty_primary and faulty_secondary:
11660         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11661                                    " one node and can not be repaired"
11662                                    " automatically" % self.instance_name,
11663                                    errors.ECODE_STATE)
11664
11665       if faulty_primary:
11666         self.disks = faulty_primary
11667         self.target_node = instance.primary_node
11668         self.other_node = secondary_node
11669         check_nodes = [self.target_node, self.other_node]
11670       elif faulty_secondary:
11671         self.disks = faulty_secondary
11672         self.target_node = secondary_node
11673         self.other_node = instance.primary_node
11674         check_nodes = [self.target_node, self.other_node]
11675       else:
11676         self.disks = []
11677         check_nodes = []
11678
11679     else:
11680       # Non-automatic modes
11681       if self.mode == constants.REPLACE_DISK_PRI:
11682         self.target_node = instance.primary_node
11683         self.other_node = secondary_node
11684         check_nodes = [self.target_node, self.other_node]
11685
11686       elif self.mode == constants.REPLACE_DISK_SEC:
11687         self.target_node = secondary_node
11688         self.other_node = instance.primary_node
11689         check_nodes = [self.target_node, self.other_node]
11690
11691       elif self.mode == constants.REPLACE_DISK_CHG:
11692         self.new_node = remote_node
11693         self.other_node = instance.primary_node
11694         self.target_node = secondary_node
11695         check_nodes = [self.new_node, self.other_node]
11696
11697         _CheckNodeNotDrained(self.lu, remote_node)
11698         _CheckNodeVmCapable(self.lu, remote_node)
11699
11700         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11701         assert old_node_info is not None
11702         if old_node_info.offline and not self.early_release:
11703           # doesn't make sense to delay the release
11704           self.early_release = True
11705           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11706                           " early-release mode", secondary_node)
11707
11708       else:
11709         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11710                                      self.mode)
11711
11712       # If not specified all disks should be replaced
11713       if not self.disks:
11714         self.disks = range(len(self.instance.disks))
11715
11716     # TODO: This is ugly, but right now we can't distinguish between internal
11717     # submitted opcode and external one. We should fix that.
11718     if self.remote_node_info:
11719       # We change the node, lets verify it still meets instance policy
11720       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11721       cluster = self.cfg.GetClusterInfo()
11722       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11723                                                               new_group_info)
11724       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11725                               ignore=self.ignore_ipolicy)
11726
11727     for node in check_nodes:
11728       _CheckNodeOnline(self.lu, node)
11729
11730     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11731                                                           self.other_node,
11732                                                           self.target_node]
11733                               if node_name is not None)
11734
11735     # Release unneeded node and node resource locks
11736     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11737     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11738     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11739
11740     # Release any owned node group
11741     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11742
11743     # Check whether disks are valid
11744     for disk_idx in self.disks:
11745       instance.FindDisk(disk_idx)
11746
11747     # Get secondary node IP addresses
11748     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11749                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11750
11751   def Exec(self, feedback_fn):
11752     """Execute disk replacement.
11753
11754     This dispatches the disk replacement to the appropriate handler.
11755
11756     """
11757     if __debug__:
11758       # Verify owned locks before starting operation
11759       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11760       assert set(owned_nodes) == set(self.node_secondary_ip), \
11761           ("Incorrect node locks, owning %s, expected %s" %
11762            (owned_nodes, self.node_secondary_ip.keys()))
11763       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11764               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11765       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11766
11767       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11768       assert list(owned_instances) == [self.instance_name], \
11769           "Instance '%s' not locked" % self.instance_name
11770
11771       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11772           "Should not own any node group lock at this point"
11773
11774     if not self.disks:
11775       feedback_fn("No disks need replacement for instance '%s'" %
11776                   self.instance.name)
11777       return
11778
11779     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11780                 (utils.CommaJoin(self.disks), self.instance.name))
11781     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11782     feedback_fn("Current seconary node: %s" %
11783                 utils.CommaJoin(self.instance.secondary_nodes))
11784
11785     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11786
11787     # Activate the instance disks if we're replacing them on a down instance
11788     if activate_disks:
11789       _StartInstanceDisks(self.lu, self.instance, True)
11790
11791     try:
11792       # Should we replace the secondary node?
11793       if self.new_node is not None:
11794         fn = self._ExecDrbd8Secondary
11795       else:
11796         fn = self._ExecDrbd8DiskOnly
11797
11798       result = fn(feedback_fn)
11799     finally:
11800       # Deactivate the instance disks if we're replacing them on a
11801       # down instance
11802       if activate_disks:
11803         _SafeShutdownInstanceDisks(self.lu, self.instance)
11804
11805     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11806
11807     if __debug__:
11808       # Verify owned locks
11809       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11810       nodes = frozenset(self.node_secondary_ip)
11811       assert ((self.early_release and not owned_nodes) or
11812               (not self.early_release and not (set(owned_nodes) - nodes))), \
11813         ("Not owning the correct locks, early_release=%s, owned=%r,"
11814          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11815
11816     return result
11817
11818   def _CheckVolumeGroup(self, nodes):
11819     self.lu.LogInfo("Checking volume groups")
11820
11821     vgname = self.cfg.GetVGName()
11822
11823     # Make sure volume group exists on all involved nodes
11824     results = self.rpc.call_vg_list(nodes)
11825     if not results:
11826       raise errors.OpExecError("Can't list volume groups on the nodes")
11827
11828     for node in nodes:
11829       res = results[node]
11830       res.Raise("Error checking node %s" % node)
11831       if vgname not in res.payload:
11832         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11833                                  (vgname, node))
11834
11835   def _CheckDisksExistence(self, nodes):
11836     # Check disk existence
11837     for idx, dev in enumerate(self.instance.disks):
11838       if idx not in self.disks:
11839         continue
11840
11841       for node in nodes:
11842         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11843         self.cfg.SetDiskID(dev, node)
11844
11845         result = _BlockdevFind(self, node, dev, self.instance)
11846
11847         msg = result.fail_msg
11848         if msg or not result.payload:
11849           if not msg:
11850             msg = "disk not found"
11851           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11852                                    (idx, node, msg))
11853
11854   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11855     for idx, dev in enumerate(self.instance.disks):
11856       if idx not in self.disks:
11857         continue
11858
11859       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11860                       (idx, node_name))
11861
11862       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11863                                    on_primary, ldisk=ldisk):
11864         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11865                                  " replace disks for instance %s" %
11866                                  (node_name, self.instance.name))
11867
11868   def _CreateNewStorage(self, node_name):
11869     """Create new storage on the primary or secondary node.
11870
11871     This is only used for same-node replaces, not for changing the
11872     secondary node, hence we don't want to modify the existing disk.
11873
11874     """
11875     iv_names = {}
11876
11877     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11878     for idx, dev in enumerate(disks):
11879       if idx not in self.disks:
11880         continue
11881
11882       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11883
11884       self.cfg.SetDiskID(dev, node_name)
11885
11886       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11887       names = _GenerateUniqueNames(self.lu, lv_names)
11888
11889       (data_disk, meta_disk) = dev.children
11890       vg_data = data_disk.logical_id[0]
11891       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11892                              logical_id=(vg_data, names[0]),
11893                              params=data_disk.params)
11894       vg_meta = meta_disk.logical_id[0]
11895       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11896                              size=constants.DRBD_META_SIZE,
11897                              logical_id=(vg_meta, names[1]),
11898                              params=meta_disk.params)
11899
11900       new_lvs = [lv_data, lv_meta]
11901       old_lvs = [child.Copy() for child in dev.children]
11902       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11903       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11904
11905       # we pass force_create=True to force the LVM creation
11906       for new_lv in new_lvs:
11907         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11908                              _GetInstanceInfoText(self.instance), False,
11909                              excl_stor)
11910
11911     return iv_names
11912
11913   def _CheckDevices(self, node_name, iv_names):
11914     for name, (dev, _, _) in iv_names.iteritems():
11915       self.cfg.SetDiskID(dev, node_name)
11916
11917       result = _BlockdevFind(self, node_name, dev, self.instance)
11918
11919       msg = result.fail_msg
11920       if msg or not result.payload:
11921         if not msg:
11922           msg = "disk not found"
11923         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11924                                  (name, msg))
11925
11926       if result.payload.is_degraded:
11927         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11928
11929   def _RemoveOldStorage(self, node_name, iv_names):
11930     for name, (_, old_lvs, _) in iv_names.iteritems():
11931       self.lu.LogInfo("Remove logical volumes for %s", name)
11932
11933       for lv in old_lvs:
11934         self.cfg.SetDiskID(lv, node_name)
11935
11936         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11937         if msg:
11938           self.lu.LogWarning("Can't remove old LV: %s", msg,
11939                              hint="remove unused LVs manually")
11940
11941   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11942     """Replace a disk on the primary or secondary for DRBD 8.
11943
11944     The algorithm for replace is quite complicated:
11945
11946       1. for each disk to be replaced:
11947
11948         1. create new LVs on the target node with unique names
11949         1. detach old LVs from the drbd device
11950         1. rename old LVs to name_replaced.<time_t>
11951         1. rename new LVs to old LVs
11952         1. attach the new LVs (with the old names now) to the drbd device
11953
11954       1. wait for sync across all devices
11955
11956       1. for each modified disk:
11957
11958         1. remove old LVs (which have the name name_replaces.<time_t>)
11959
11960     Failures are not very well handled.
11961
11962     """
11963     steps_total = 6
11964
11965     # Step: check device activation
11966     self.lu.LogStep(1, steps_total, "Check device existence")
11967     self._CheckDisksExistence([self.other_node, self.target_node])
11968     self._CheckVolumeGroup([self.target_node, self.other_node])
11969
11970     # Step: check other node consistency
11971     self.lu.LogStep(2, steps_total, "Check peer consistency")
11972     self._CheckDisksConsistency(self.other_node,
11973                                 self.other_node == self.instance.primary_node,
11974                                 False)
11975
11976     # Step: create new storage
11977     self.lu.LogStep(3, steps_total, "Allocate new storage")
11978     iv_names = self._CreateNewStorage(self.target_node)
11979
11980     # Step: for each lv, detach+rename*2+attach
11981     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11982     for dev, old_lvs, new_lvs in iv_names.itervalues():
11983       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11984
11985       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11986                                                      old_lvs)
11987       result.Raise("Can't detach drbd from local storage on node"
11988                    " %s for device %s" % (self.target_node, dev.iv_name))
11989       #dev.children = []
11990       #cfg.Update(instance)
11991
11992       # ok, we created the new LVs, so now we know we have the needed
11993       # storage; as such, we proceed on the target node to rename
11994       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11995       # using the assumption that logical_id == physical_id (which in
11996       # turn is the unique_id on that node)
11997
11998       # FIXME(iustin): use a better name for the replaced LVs
11999       temp_suffix = int(time.time())
12000       ren_fn = lambda d, suff: (d.physical_id[0],
12001                                 d.physical_id[1] + "_replaced-%s" % suff)
12002
12003       # Build the rename list based on what LVs exist on the node
12004       rename_old_to_new = []
12005       for to_ren in old_lvs:
12006         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12007         if not result.fail_msg and result.payload:
12008           # device exists
12009           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12010
12011       self.lu.LogInfo("Renaming the old LVs on the target node")
12012       result = self.rpc.call_blockdev_rename(self.target_node,
12013                                              rename_old_to_new)
12014       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12015
12016       # Now we rename the new LVs to the old LVs
12017       self.lu.LogInfo("Renaming the new LVs on the target node")
12018       rename_new_to_old = [(new, old.physical_id)
12019                            for old, new in zip(old_lvs, new_lvs)]
12020       result = self.rpc.call_blockdev_rename(self.target_node,
12021                                              rename_new_to_old)
12022       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12023
12024       # Intermediate steps of in memory modifications
12025       for old, new in zip(old_lvs, new_lvs):
12026         new.logical_id = old.logical_id
12027         self.cfg.SetDiskID(new, self.target_node)
12028
12029       # We need to modify old_lvs so that removal later removes the
12030       # right LVs, not the newly added ones; note that old_lvs is a
12031       # copy here
12032       for disk in old_lvs:
12033         disk.logical_id = ren_fn(disk, temp_suffix)
12034         self.cfg.SetDiskID(disk, self.target_node)
12035
12036       # Now that the new lvs have the old name, we can add them to the device
12037       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12038       result = self.rpc.call_blockdev_addchildren(self.target_node,
12039                                                   (dev, self.instance), new_lvs)
12040       msg = result.fail_msg
12041       if msg:
12042         for new_lv in new_lvs:
12043           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12044                                                new_lv).fail_msg
12045           if msg2:
12046             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12047                                hint=("cleanup manually the unused logical"
12048                                      "volumes"))
12049         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12050
12051     cstep = itertools.count(5)
12052
12053     if self.early_release:
12054       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12055       self._RemoveOldStorage(self.target_node, iv_names)
12056       # TODO: Check if releasing locks early still makes sense
12057       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12058     else:
12059       # Release all resource locks except those used by the instance
12060       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12061                     keep=self.node_secondary_ip.keys())
12062
12063     # Release all node locks while waiting for sync
12064     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12065
12066     # TODO: Can the instance lock be downgraded here? Take the optional disk
12067     # shutdown in the caller into consideration.
12068
12069     # Wait for sync
12070     # This can fail as the old devices are degraded and _WaitForSync
12071     # does a combined result over all disks, so we don't check its return value
12072     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12073     _WaitForSync(self.lu, self.instance)
12074
12075     # Check all devices manually
12076     self._CheckDevices(self.instance.primary_node, iv_names)
12077
12078     # Step: remove old storage
12079     if not self.early_release:
12080       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12081       self._RemoveOldStorage(self.target_node, iv_names)
12082
12083   def _ExecDrbd8Secondary(self, feedback_fn):
12084     """Replace the secondary node for DRBD 8.
12085
12086     The algorithm for replace is quite complicated:
12087       - for all disks of the instance:
12088         - create new LVs on the new node with same names
12089         - shutdown the drbd device on the old secondary
12090         - disconnect the drbd network on the primary
12091         - create the drbd device on the new secondary
12092         - network attach the drbd on the primary, using an artifice:
12093           the drbd code for Attach() will connect to the network if it
12094           finds a device which is connected to the good local disks but
12095           not network enabled
12096       - wait for sync across all devices
12097       - remove all disks from the old secondary
12098
12099     Failures are not very well handled.
12100
12101     """
12102     steps_total = 6
12103
12104     pnode = self.instance.primary_node
12105
12106     # Step: check device activation
12107     self.lu.LogStep(1, steps_total, "Check device existence")
12108     self._CheckDisksExistence([self.instance.primary_node])
12109     self._CheckVolumeGroup([self.instance.primary_node])
12110
12111     # Step: check other node consistency
12112     self.lu.LogStep(2, steps_total, "Check peer consistency")
12113     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12114
12115     # Step: create new storage
12116     self.lu.LogStep(3, steps_total, "Allocate new storage")
12117     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12118     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12119     for idx, dev in enumerate(disks):
12120       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12121                       (self.new_node, idx))
12122       # we pass force_create=True to force LVM creation
12123       for new_lv in dev.children:
12124         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12125                              True, _GetInstanceInfoText(self.instance), False,
12126                              excl_stor)
12127
12128     # Step 4: dbrd minors and drbd setups changes
12129     # after this, we must manually remove the drbd minors on both the
12130     # error and the success paths
12131     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12132     minors = self.cfg.AllocateDRBDMinor([self.new_node
12133                                          for dev in self.instance.disks],
12134                                         self.instance.name)
12135     logging.debug("Allocated minors %r", minors)
12136
12137     iv_names = {}
12138     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12139       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12140                       (self.new_node, idx))
12141       # create new devices on new_node; note that we create two IDs:
12142       # one without port, so the drbd will be activated without
12143       # networking information on the new node at this stage, and one
12144       # with network, for the latter activation in step 4
12145       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12146       if self.instance.primary_node == o_node1:
12147         p_minor = o_minor1
12148       else:
12149         assert self.instance.primary_node == o_node2, "Three-node instance?"
12150         p_minor = o_minor2
12151
12152       new_alone_id = (self.instance.primary_node, self.new_node, None,
12153                       p_minor, new_minor, o_secret)
12154       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12155                     p_minor, new_minor, o_secret)
12156
12157       iv_names[idx] = (dev, dev.children, new_net_id)
12158       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12159                     new_net_id)
12160       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12161                               logical_id=new_alone_id,
12162                               children=dev.children,
12163                               size=dev.size,
12164                               params={})
12165       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12166                                              self.cfg)
12167       try:
12168         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12169                               anno_new_drbd,
12170                               _GetInstanceInfoText(self.instance), False,
12171                               excl_stor)
12172       except errors.GenericError:
12173         self.cfg.ReleaseDRBDMinors(self.instance.name)
12174         raise
12175
12176     # We have new devices, shutdown the drbd on the old secondary
12177     for idx, dev in enumerate(self.instance.disks):
12178       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12179       self.cfg.SetDiskID(dev, self.target_node)
12180       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12181                                             (dev, self.instance)).fail_msg
12182       if msg:
12183         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12184                            "node: %s" % (idx, msg),
12185                            hint=("Please cleanup this device manually as"
12186                                  " soon as possible"))
12187
12188     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12189     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12190                                                self.instance.disks)[pnode]
12191
12192     msg = result.fail_msg
12193     if msg:
12194       # detaches didn't succeed (unlikely)
12195       self.cfg.ReleaseDRBDMinors(self.instance.name)
12196       raise errors.OpExecError("Can't detach the disks from the network on"
12197                                " old node: %s" % (msg,))
12198
12199     # if we managed to detach at least one, we update all the disks of
12200     # the instance to point to the new secondary
12201     self.lu.LogInfo("Updating instance configuration")
12202     for dev, _, new_logical_id in iv_names.itervalues():
12203       dev.logical_id = new_logical_id
12204       self.cfg.SetDiskID(dev, self.instance.primary_node)
12205
12206     self.cfg.Update(self.instance, feedback_fn)
12207
12208     # Release all node locks (the configuration has been updated)
12209     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12210
12211     # and now perform the drbd attach
12212     self.lu.LogInfo("Attaching primary drbds to new secondary"
12213                     " (standalone => connected)")
12214     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12215                                             self.new_node],
12216                                            self.node_secondary_ip,
12217                                            (self.instance.disks, self.instance),
12218                                            self.instance.name,
12219                                            False)
12220     for to_node, to_result in result.items():
12221       msg = to_result.fail_msg
12222       if msg:
12223         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12224                            to_node, msg,
12225                            hint=("please do a gnt-instance info to see the"
12226                                  " status of disks"))
12227
12228     cstep = itertools.count(5)
12229
12230     if self.early_release:
12231       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12232       self._RemoveOldStorage(self.target_node, iv_names)
12233       # TODO: Check if releasing locks early still makes sense
12234       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12235     else:
12236       # Release all resource locks except those used by the instance
12237       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12238                     keep=self.node_secondary_ip.keys())
12239
12240     # TODO: Can the instance lock be downgraded here? Take the optional disk
12241     # shutdown in the caller into consideration.
12242
12243     # Wait for sync
12244     # This can fail as the old devices are degraded and _WaitForSync
12245     # does a combined result over all disks, so we don't check its return value
12246     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12247     _WaitForSync(self.lu, self.instance)
12248
12249     # Check all devices manually
12250     self._CheckDevices(self.instance.primary_node, iv_names)
12251
12252     # Step: remove old storage
12253     if not self.early_release:
12254       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12255       self._RemoveOldStorage(self.target_node, iv_names)
12256
12257
12258 class LURepairNodeStorage(NoHooksLU):
12259   """Repairs the volume group on a node.
12260
12261   """
12262   REQ_BGL = False
12263
12264   def CheckArguments(self):
12265     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12266
12267     storage_type = self.op.storage_type
12268
12269     if (constants.SO_FIX_CONSISTENCY not in
12270         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12271       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12272                                  " repaired" % storage_type,
12273                                  errors.ECODE_INVAL)
12274
12275   def ExpandNames(self):
12276     self.needed_locks = {
12277       locking.LEVEL_NODE: [self.op.node_name],
12278       }
12279
12280   def _CheckFaultyDisks(self, instance, node_name):
12281     """Ensure faulty disks abort the opcode or at least warn."""
12282     try:
12283       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12284                                   node_name, True):
12285         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12286                                    " node '%s'" % (instance.name, node_name),
12287                                    errors.ECODE_STATE)
12288     except errors.OpPrereqError, err:
12289       if self.op.ignore_consistency:
12290         self.LogWarning(str(err.args[0]))
12291       else:
12292         raise
12293
12294   def CheckPrereq(self):
12295     """Check prerequisites.
12296
12297     """
12298     # Check whether any instance on this node has faulty disks
12299     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12300       if inst.admin_state != constants.ADMINST_UP:
12301         continue
12302       check_nodes = set(inst.all_nodes)
12303       check_nodes.discard(self.op.node_name)
12304       for inst_node_name in check_nodes:
12305         self._CheckFaultyDisks(inst, inst_node_name)
12306
12307   def Exec(self, feedback_fn):
12308     feedback_fn("Repairing storage unit '%s' on %s ..." %
12309                 (self.op.name, self.op.node_name))
12310
12311     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12312     result = self.rpc.call_storage_execute(self.op.node_name,
12313                                            self.op.storage_type, st_args,
12314                                            self.op.name,
12315                                            constants.SO_FIX_CONSISTENCY)
12316     result.Raise("Failed to repair storage unit '%s' on %s" %
12317                  (self.op.name, self.op.node_name))
12318
12319
12320 class LUNodeEvacuate(NoHooksLU):
12321   """Evacuates instances off a list of nodes.
12322
12323   """
12324   REQ_BGL = False
12325
12326   _MODE2IALLOCATOR = {
12327     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12328     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12329     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12330     }
12331   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12332   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12333           constants.IALLOCATOR_NEVAC_MODES)
12334
12335   def CheckArguments(self):
12336     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12337
12338   def ExpandNames(self):
12339     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12340
12341     if self.op.remote_node is not None:
12342       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12343       assert self.op.remote_node
12344
12345       if self.op.remote_node == self.op.node_name:
12346         raise errors.OpPrereqError("Can not use evacuated node as a new"
12347                                    " secondary node", errors.ECODE_INVAL)
12348
12349       if self.op.mode != constants.NODE_EVAC_SEC:
12350         raise errors.OpPrereqError("Without the use of an iallocator only"
12351                                    " secondary instances can be evacuated",
12352                                    errors.ECODE_INVAL)
12353
12354     # Declare locks
12355     self.share_locks = _ShareAll()
12356     self.needed_locks = {
12357       locking.LEVEL_INSTANCE: [],
12358       locking.LEVEL_NODEGROUP: [],
12359       locking.LEVEL_NODE: [],
12360       }
12361
12362     # Determine nodes (via group) optimistically, needs verification once locks
12363     # have been acquired
12364     self.lock_nodes = self._DetermineNodes()
12365
12366   def _DetermineNodes(self):
12367     """Gets the list of nodes to operate on.
12368
12369     """
12370     if self.op.remote_node is None:
12371       # Iallocator will choose any node(s) in the same group
12372       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12373     else:
12374       group_nodes = frozenset([self.op.remote_node])
12375
12376     # Determine nodes to be locked
12377     return set([self.op.node_name]) | group_nodes
12378
12379   def _DetermineInstances(self):
12380     """Builds list of instances to operate on.
12381
12382     """
12383     assert self.op.mode in constants.NODE_EVAC_MODES
12384
12385     if self.op.mode == constants.NODE_EVAC_PRI:
12386       # Primary instances only
12387       inst_fn = _GetNodePrimaryInstances
12388       assert self.op.remote_node is None, \
12389         "Evacuating primary instances requires iallocator"
12390     elif self.op.mode == constants.NODE_EVAC_SEC:
12391       # Secondary instances only
12392       inst_fn = _GetNodeSecondaryInstances
12393     else:
12394       # All instances
12395       assert self.op.mode == constants.NODE_EVAC_ALL
12396       inst_fn = _GetNodeInstances
12397       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12398       # per instance
12399       raise errors.OpPrereqError("Due to an issue with the iallocator"
12400                                  " interface it is not possible to evacuate"
12401                                  " all instances at once; specify explicitly"
12402                                  " whether to evacuate primary or secondary"
12403                                  " instances",
12404                                  errors.ECODE_INVAL)
12405
12406     return inst_fn(self.cfg, self.op.node_name)
12407
12408   def DeclareLocks(self, level):
12409     if level == locking.LEVEL_INSTANCE:
12410       # Lock instances optimistically, needs verification once node and group
12411       # locks have been acquired
12412       self.needed_locks[locking.LEVEL_INSTANCE] = \
12413         set(i.name for i in self._DetermineInstances())
12414
12415     elif level == locking.LEVEL_NODEGROUP:
12416       # Lock node groups for all potential target nodes optimistically, needs
12417       # verification once nodes have been acquired
12418       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12419         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12420
12421     elif level == locking.LEVEL_NODE:
12422       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12423
12424   def CheckPrereq(self):
12425     # Verify locks
12426     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12427     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12428     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12429
12430     need_nodes = self._DetermineNodes()
12431
12432     if not owned_nodes.issuperset(need_nodes):
12433       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12434                                  " locks were acquired, current nodes are"
12435                                  " are '%s', used to be '%s'; retry the"
12436                                  " operation" %
12437                                  (self.op.node_name,
12438                                   utils.CommaJoin(need_nodes),
12439                                   utils.CommaJoin(owned_nodes)),
12440                                  errors.ECODE_STATE)
12441
12442     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12443     if owned_groups != wanted_groups:
12444       raise errors.OpExecError("Node groups changed since locks were acquired,"
12445                                " current groups are '%s', used to be '%s';"
12446                                " retry the operation" %
12447                                (utils.CommaJoin(wanted_groups),
12448                                 utils.CommaJoin(owned_groups)))
12449
12450     # Determine affected instances
12451     self.instances = self._DetermineInstances()
12452     self.instance_names = [i.name for i in self.instances]
12453
12454     if set(self.instance_names) != owned_instances:
12455       raise errors.OpExecError("Instances on node '%s' changed since locks"
12456                                " were acquired, current instances are '%s',"
12457                                " used to be '%s'; retry the operation" %
12458                                (self.op.node_name,
12459                                 utils.CommaJoin(self.instance_names),
12460                                 utils.CommaJoin(owned_instances)))
12461
12462     if self.instance_names:
12463       self.LogInfo("Evacuating instances from node '%s': %s",
12464                    self.op.node_name,
12465                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12466     else:
12467       self.LogInfo("No instances to evacuate from node '%s'",
12468                    self.op.node_name)
12469
12470     if self.op.remote_node is not None:
12471       for i in self.instances:
12472         if i.primary_node == self.op.remote_node:
12473           raise errors.OpPrereqError("Node %s is the primary node of"
12474                                      " instance %s, cannot use it as"
12475                                      " secondary" %
12476                                      (self.op.remote_node, i.name),
12477                                      errors.ECODE_INVAL)
12478
12479   def Exec(self, feedback_fn):
12480     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12481
12482     if not self.instance_names:
12483       # No instances to evacuate
12484       jobs = []
12485
12486     elif self.op.iallocator is not None:
12487       # TODO: Implement relocation to other group
12488       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12489       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12490                                      instances=list(self.instance_names))
12491       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12492
12493       ial.Run(self.op.iallocator)
12494
12495       if not ial.success:
12496         raise errors.OpPrereqError("Can't compute node evacuation using"
12497                                    " iallocator '%s': %s" %
12498                                    (self.op.iallocator, ial.info),
12499                                    errors.ECODE_NORES)
12500
12501       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12502
12503     elif self.op.remote_node is not None:
12504       assert self.op.mode == constants.NODE_EVAC_SEC
12505       jobs = [
12506         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12507                                         remote_node=self.op.remote_node,
12508                                         disks=[],
12509                                         mode=constants.REPLACE_DISK_CHG,
12510                                         early_release=self.op.early_release)]
12511         for instance_name in self.instance_names]
12512
12513     else:
12514       raise errors.ProgrammerError("No iallocator or remote node")
12515
12516     return ResultWithJobs(jobs)
12517
12518
12519 def _SetOpEarlyRelease(early_release, op):
12520   """Sets C{early_release} flag on opcodes if available.
12521
12522   """
12523   try:
12524     op.early_release = early_release
12525   except AttributeError:
12526     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12527
12528   return op
12529
12530
12531 def _NodeEvacDest(use_nodes, group, nodes):
12532   """Returns group or nodes depending on caller's choice.
12533
12534   """
12535   if use_nodes:
12536     return utils.CommaJoin(nodes)
12537   else:
12538     return group
12539
12540
12541 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12542   """Unpacks the result of change-group and node-evacuate iallocator requests.
12543
12544   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12545   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12546
12547   @type lu: L{LogicalUnit}
12548   @param lu: Logical unit instance
12549   @type alloc_result: tuple/list
12550   @param alloc_result: Result from iallocator
12551   @type early_release: bool
12552   @param early_release: Whether to release locks early if possible
12553   @type use_nodes: bool
12554   @param use_nodes: Whether to display node names instead of groups
12555
12556   """
12557   (moved, failed, jobs) = alloc_result
12558
12559   if failed:
12560     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12561                                  for (name, reason) in failed)
12562     lu.LogWarning("Unable to evacuate instances %s", failreason)
12563     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12564
12565   if moved:
12566     lu.LogInfo("Instances to be moved: %s",
12567                utils.CommaJoin("%s (to %s)" %
12568                                (name, _NodeEvacDest(use_nodes, group, nodes))
12569                                for (name, group, nodes) in moved))
12570
12571   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12572               map(opcodes.OpCode.LoadOpCode, ops))
12573           for ops in jobs]
12574
12575
12576 def _DiskSizeInBytesToMebibytes(lu, size):
12577   """Converts a disk size in bytes to mebibytes.
12578
12579   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12580
12581   """
12582   (mib, remainder) = divmod(size, 1024 * 1024)
12583
12584   if remainder != 0:
12585     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12586                   " to not overwrite existing data (%s bytes will not be"
12587                   " wiped)", (1024 * 1024) - remainder)
12588     mib += 1
12589
12590   return mib
12591
12592
12593 class LUInstanceGrowDisk(LogicalUnit):
12594   """Grow a disk of an instance.
12595
12596   """
12597   HPATH = "disk-grow"
12598   HTYPE = constants.HTYPE_INSTANCE
12599   REQ_BGL = False
12600
12601   def ExpandNames(self):
12602     self._ExpandAndLockInstance()
12603     self.needed_locks[locking.LEVEL_NODE] = []
12604     self.needed_locks[locking.LEVEL_NODE_RES] = []
12605     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12606     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12607
12608   def DeclareLocks(self, level):
12609     if level == locking.LEVEL_NODE:
12610       self._LockInstancesNodes()
12611     elif level == locking.LEVEL_NODE_RES:
12612       # Copy node locks
12613       self.needed_locks[locking.LEVEL_NODE_RES] = \
12614         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12615
12616   def BuildHooksEnv(self):
12617     """Build hooks env.
12618
12619     This runs on the master, the primary and all the secondaries.
12620
12621     """
12622     env = {
12623       "DISK": self.op.disk,
12624       "AMOUNT": self.op.amount,
12625       "ABSOLUTE": self.op.absolute,
12626       }
12627     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12628     return env
12629
12630   def BuildHooksNodes(self):
12631     """Build hooks nodes.
12632
12633     """
12634     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12635     return (nl, nl)
12636
12637   def CheckPrereq(self):
12638     """Check prerequisites.
12639
12640     This checks that the instance is in the cluster.
12641
12642     """
12643     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12644     assert instance is not None, \
12645       "Cannot retrieve locked instance %s" % self.op.instance_name
12646     nodenames = list(instance.all_nodes)
12647     for node in nodenames:
12648       _CheckNodeOnline(self, node)
12649
12650     self.instance = instance
12651
12652     if instance.disk_template not in constants.DTS_GROWABLE:
12653       raise errors.OpPrereqError("Instance's disk layout does not support"
12654                                  " growing", errors.ECODE_INVAL)
12655
12656     self.disk = instance.FindDisk(self.op.disk)
12657
12658     if self.op.absolute:
12659       self.target = self.op.amount
12660       self.delta = self.target - self.disk.size
12661       if self.delta < 0:
12662         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12663                                    "current disk size (%s)" %
12664                                    (utils.FormatUnit(self.target, "h"),
12665                                     utils.FormatUnit(self.disk.size, "h")),
12666                                    errors.ECODE_STATE)
12667     else:
12668       self.delta = self.op.amount
12669       self.target = self.disk.size + self.delta
12670       if self.delta < 0:
12671         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12672                                    utils.FormatUnit(self.delta, "h"),
12673                                    errors.ECODE_INVAL)
12674
12675     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12676
12677   def _CheckDiskSpace(self, nodenames, req_vgspace):
12678     template = self.instance.disk_template
12679     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12680       # TODO: check the free disk space for file, when that feature will be
12681       # supported
12682       nodes = map(self.cfg.GetNodeInfo, nodenames)
12683       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12684                         nodes)
12685       if es_nodes:
12686         # With exclusive storage we need to something smarter than just looking
12687         # at free space; for now, let's simply abort the operation.
12688         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12689                                    " is enabled", errors.ECODE_STATE)
12690       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12691
12692   def Exec(self, feedback_fn):
12693     """Execute disk grow.
12694
12695     """
12696     instance = self.instance
12697     disk = self.disk
12698
12699     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12700     assert (self.owned_locks(locking.LEVEL_NODE) ==
12701             self.owned_locks(locking.LEVEL_NODE_RES))
12702
12703     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12704
12705     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12706     if not disks_ok:
12707       raise errors.OpExecError("Cannot activate block device to grow")
12708
12709     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12710                 (self.op.disk, instance.name,
12711                  utils.FormatUnit(self.delta, "h"),
12712                  utils.FormatUnit(self.target, "h")))
12713
12714     # First run all grow ops in dry-run mode
12715     for node in instance.all_nodes:
12716       self.cfg.SetDiskID(disk, node)
12717       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12718                                            True, True)
12719       result.Raise("Dry-run grow request failed to node %s" % node)
12720
12721     if wipe_disks:
12722       # Get disk size from primary node for wiping
12723       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12724       result.Raise("Failed to retrieve disk size from node '%s'" %
12725                    instance.primary_node)
12726
12727       (disk_size_in_bytes, ) = result.payload
12728
12729       if disk_size_in_bytes is None:
12730         raise errors.OpExecError("Failed to retrieve disk size from primary"
12731                                  " node '%s'" % instance.primary_node)
12732
12733       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12734
12735       assert old_disk_size >= disk.size, \
12736         ("Retrieved disk size too small (got %s, should be at least %s)" %
12737          (old_disk_size, disk.size))
12738     else:
12739       old_disk_size = None
12740
12741     # We know that (as far as we can test) operations across different
12742     # nodes will succeed, time to run it for real on the backing storage
12743     for node in instance.all_nodes:
12744       self.cfg.SetDiskID(disk, node)
12745       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12746                                            False, True)
12747       result.Raise("Grow request failed to node %s" % node)
12748
12749     # And now execute it for logical storage, on the primary node
12750     node = instance.primary_node
12751     self.cfg.SetDiskID(disk, node)
12752     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12753                                          False, False)
12754     result.Raise("Grow request failed to node %s" % node)
12755
12756     disk.RecordGrow(self.delta)
12757     self.cfg.Update(instance, feedback_fn)
12758
12759     # Changes have been recorded, release node lock
12760     _ReleaseLocks(self, locking.LEVEL_NODE)
12761
12762     # Downgrade lock while waiting for sync
12763     self.glm.downgrade(locking.LEVEL_INSTANCE)
12764
12765     assert wipe_disks ^ (old_disk_size is None)
12766
12767     if wipe_disks:
12768       assert instance.disks[self.op.disk] == disk
12769
12770       # Wipe newly added disk space
12771       _WipeDisks(self, instance,
12772                  disks=[(self.op.disk, disk, old_disk_size)])
12773
12774     if self.op.wait_for_sync:
12775       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12776       if disk_abort:
12777         self.LogWarning("Disk syncing has not returned a good status; check"
12778                         " the instance")
12779       if instance.admin_state != constants.ADMINST_UP:
12780         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12781     elif instance.admin_state != constants.ADMINST_UP:
12782       self.LogWarning("Not shutting down the disk even if the instance is"
12783                       " not supposed to be running because no wait for"
12784                       " sync mode was requested")
12785
12786     assert self.owned_locks(locking.LEVEL_NODE_RES)
12787     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12788
12789
12790 class LUInstanceQueryData(NoHooksLU):
12791   """Query runtime instance data.
12792
12793   """
12794   REQ_BGL = False
12795
12796   def ExpandNames(self):
12797     self.needed_locks = {}
12798
12799     # Use locking if requested or when non-static information is wanted
12800     if not (self.op.static or self.op.use_locking):
12801       self.LogWarning("Non-static data requested, locks need to be acquired")
12802       self.op.use_locking = True
12803
12804     if self.op.instances or not self.op.use_locking:
12805       # Expand instance names right here
12806       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12807     else:
12808       # Will use acquired locks
12809       self.wanted_names = None
12810
12811     if self.op.use_locking:
12812       self.share_locks = _ShareAll()
12813
12814       if self.wanted_names is None:
12815         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12816       else:
12817         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12818
12819       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12820       self.needed_locks[locking.LEVEL_NODE] = []
12821       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12822
12823   def DeclareLocks(self, level):
12824     if self.op.use_locking:
12825       if level == locking.LEVEL_NODEGROUP:
12826         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12827
12828         # Lock all groups used by instances optimistically; this requires going
12829         # via the node before it's locked, requiring verification later on
12830         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12831           frozenset(group_uuid
12832                     for instance_name in owned_instances
12833                     for group_uuid in
12834                       self.cfg.GetInstanceNodeGroups(instance_name))
12835
12836       elif level == locking.LEVEL_NODE:
12837         self._LockInstancesNodes()
12838
12839   def CheckPrereq(self):
12840     """Check prerequisites.
12841
12842     This only checks the optional instance list against the existing names.
12843
12844     """
12845     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12846     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12847     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12848
12849     if self.wanted_names is None:
12850       assert self.op.use_locking, "Locking was not used"
12851       self.wanted_names = owned_instances
12852
12853     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12854
12855     if self.op.use_locking:
12856       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12857                                 None)
12858     else:
12859       assert not (owned_instances or owned_groups or owned_nodes)
12860
12861     self.wanted_instances = instances.values()
12862
12863   def _ComputeBlockdevStatus(self, node, instance, dev):
12864     """Returns the status of a block device
12865
12866     """
12867     if self.op.static or not node:
12868       return None
12869
12870     self.cfg.SetDiskID(dev, node)
12871
12872     result = self.rpc.call_blockdev_find(node, dev)
12873     if result.offline:
12874       return None
12875
12876     result.Raise("Can't compute disk status for %s" % instance.name)
12877
12878     status = result.payload
12879     if status is None:
12880       return None
12881
12882     return (status.dev_path, status.major, status.minor,
12883             status.sync_percent, status.estimated_time,
12884             status.is_degraded, status.ldisk_status)
12885
12886   def _ComputeDiskStatus(self, instance, snode, dev):
12887     """Compute block device status.
12888
12889     """
12890     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12891
12892     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12893
12894   def _ComputeDiskStatusInner(self, instance, snode, dev):
12895     """Compute block device status.
12896
12897     @attention: The device has to be annotated already.
12898
12899     """
12900     if dev.dev_type in constants.LDS_DRBD:
12901       # we change the snode then (otherwise we use the one passed in)
12902       if dev.logical_id[0] == instance.primary_node:
12903         snode = dev.logical_id[1]
12904       else:
12905         snode = dev.logical_id[0]
12906
12907     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12908                                               instance, dev)
12909     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12910
12911     if dev.children:
12912       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12913                                         instance, snode),
12914                          dev.children)
12915     else:
12916       dev_children = []
12917
12918     return {
12919       "iv_name": dev.iv_name,
12920       "dev_type": dev.dev_type,
12921       "logical_id": dev.logical_id,
12922       "physical_id": dev.physical_id,
12923       "pstatus": dev_pstatus,
12924       "sstatus": dev_sstatus,
12925       "children": dev_children,
12926       "mode": dev.mode,
12927       "size": dev.size,
12928       }
12929
12930   def Exec(self, feedback_fn):
12931     """Gather and return data"""
12932     result = {}
12933
12934     cluster = self.cfg.GetClusterInfo()
12935
12936     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12937     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12938
12939     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12940                                                  for node in nodes.values()))
12941
12942     group2name_fn = lambda uuid: groups[uuid].name
12943
12944     for instance in self.wanted_instances:
12945       pnode = nodes[instance.primary_node]
12946
12947       if self.op.static or pnode.offline:
12948         remote_state = None
12949         if pnode.offline:
12950           self.LogWarning("Primary node %s is marked offline, returning static"
12951                           " information only for instance %s" %
12952                           (pnode.name, instance.name))
12953       else:
12954         remote_info = self.rpc.call_instance_info(instance.primary_node,
12955                                                   instance.name,
12956                                                   instance.hypervisor)
12957         remote_info.Raise("Error checking node %s" % instance.primary_node)
12958         remote_info = remote_info.payload
12959         if remote_info and "state" in remote_info:
12960           remote_state = "up"
12961         else:
12962           if instance.admin_state == constants.ADMINST_UP:
12963             remote_state = "down"
12964           else:
12965             remote_state = instance.admin_state
12966
12967       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12968                   instance.disks)
12969
12970       snodes_group_uuids = [nodes[snode_name].group
12971                             for snode_name in instance.secondary_nodes]
12972
12973       result[instance.name] = {
12974         "name": instance.name,
12975         "config_state": instance.admin_state,
12976         "run_state": remote_state,
12977         "pnode": instance.primary_node,
12978         "pnode_group_uuid": pnode.group,
12979         "pnode_group_name": group2name_fn(pnode.group),
12980         "snodes": instance.secondary_nodes,
12981         "snodes_group_uuids": snodes_group_uuids,
12982         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12983         "os": instance.os,
12984         # this happens to be the same format used for hooks
12985         "nics": _NICListToTuple(self, instance.nics),
12986         "disk_template": instance.disk_template,
12987         "disks": disks,
12988         "hypervisor": instance.hypervisor,
12989         "network_port": instance.network_port,
12990         "hv_instance": instance.hvparams,
12991         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12992         "be_instance": instance.beparams,
12993         "be_actual": cluster.FillBE(instance),
12994         "os_instance": instance.osparams,
12995         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12996         "serial_no": instance.serial_no,
12997         "mtime": instance.mtime,
12998         "ctime": instance.ctime,
12999         "uuid": instance.uuid,
13000         }
13001
13002     return result
13003
13004
13005 def PrepareContainerMods(mods, private_fn):
13006   """Prepares a list of container modifications by adding a private data field.
13007
13008   @type mods: list of tuples; (operation, index, parameters)
13009   @param mods: List of modifications
13010   @type private_fn: callable or None
13011   @param private_fn: Callable for constructing a private data field for a
13012     modification
13013   @rtype: list
13014
13015   """
13016   if private_fn is None:
13017     fn = lambda: None
13018   else:
13019     fn = private_fn
13020
13021   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13022
13023
13024 #: Type description for changes as returned by L{ApplyContainerMods}'s
13025 #: callbacks
13026 _TApplyContModsCbChanges = \
13027   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13028     ht.TNonEmptyString,
13029     ht.TAny,
13030     ])))
13031
13032
13033 def ApplyContainerMods(kind, container, chgdesc, mods,
13034                        create_fn, modify_fn, remove_fn):
13035   """Applies descriptions in C{mods} to C{container}.
13036
13037   @type kind: string
13038   @param kind: One-word item description
13039   @type container: list
13040   @param container: Container to modify
13041   @type chgdesc: None or list
13042   @param chgdesc: List of applied changes
13043   @type mods: list
13044   @param mods: Modifications as returned by L{PrepareContainerMods}
13045   @type create_fn: callable
13046   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13047     receives absolute item index, parameters and private data object as added
13048     by L{PrepareContainerMods}, returns tuple containing new item and changes
13049     as list
13050   @type modify_fn: callable
13051   @param modify_fn: Callback for modifying an existing item
13052     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13053     and private data object as added by L{PrepareContainerMods}, returns
13054     changes as list
13055   @type remove_fn: callable
13056   @param remove_fn: Callback on removing item; receives absolute item index,
13057     item and private data object as added by L{PrepareContainerMods}
13058
13059   """
13060   for (op, idx, params, private) in mods:
13061     if idx == -1:
13062       # Append
13063       absidx = len(container) - 1
13064     elif idx < 0:
13065       raise IndexError("Not accepting negative indices other than -1")
13066     elif idx > len(container):
13067       raise IndexError("Got %s index %s, but there are only %s" %
13068                        (kind, idx, len(container)))
13069     else:
13070       absidx = idx
13071
13072     changes = None
13073
13074     if op == constants.DDM_ADD:
13075       # Calculate where item will be added
13076       if idx == -1:
13077         addidx = len(container)
13078       else:
13079         addidx = idx
13080
13081       if create_fn is None:
13082         item = params
13083       else:
13084         (item, changes) = create_fn(addidx, params, private)
13085
13086       if idx == -1:
13087         container.append(item)
13088       else:
13089         assert idx >= 0
13090         assert idx <= len(container)
13091         # list.insert does so before the specified index
13092         container.insert(idx, item)
13093     else:
13094       # Retrieve existing item
13095       try:
13096         item = container[absidx]
13097       except IndexError:
13098         raise IndexError("Invalid %s index %s" % (kind, idx))
13099
13100       if op == constants.DDM_REMOVE:
13101         assert not params
13102
13103         if remove_fn is not None:
13104           remove_fn(absidx, item, private)
13105
13106         changes = [("%s/%s" % (kind, absidx), "remove")]
13107
13108         assert container[absidx] == item
13109         del container[absidx]
13110       elif op == constants.DDM_MODIFY:
13111         if modify_fn is not None:
13112           changes = modify_fn(absidx, item, params, private)
13113       else:
13114         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13115
13116     assert _TApplyContModsCbChanges(changes)
13117
13118     if not (chgdesc is None or changes is None):
13119       chgdesc.extend(changes)
13120
13121
13122 def _UpdateIvNames(base_index, disks):
13123   """Updates the C{iv_name} attribute of disks.
13124
13125   @type disks: list of L{objects.Disk}
13126
13127   """
13128   for (idx, disk) in enumerate(disks):
13129     disk.iv_name = "disk/%s" % (base_index + idx, )
13130
13131
13132 class _InstNicModPrivate:
13133   """Data structure for network interface modifications.
13134
13135   Used by L{LUInstanceSetParams}.
13136
13137   """
13138   def __init__(self):
13139     self.params = None
13140     self.filled = None
13141
13142
13143 class LUInstanceSetParams(LogicalUnit):
13144   """Modifies an instances's parameters.
13145
13146   """
13147   HPATH = "instance-modify"
13148   HTYPE = constants.HTYPE_INSTANCE
13149   REQ_BGL = False
13150
13151   @staticmethod
13152   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13153     assert ht.TList(mods)
13154     assert not mods or len(mods[0]) in (2, 3)
13155
13156     if mods and len(mods[0]) == 2:
13157       result = []
13158
13159       addremove = 0
13160       for op, params in mods:
13161         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13162           result.append((op, -1, params))
13163           addremove += 1
13164
13165           if addremove > 1:
13166             raise errors.OpPrereqError("Only one %s add or remove operation is"
13167                                        " supported at a time" % kind,
13168                                        errors.ECODE_INVAL)
13169         else:
13170           result.append((constants.DDM_MODIFY, op, params))
13171
13172       assert verify_fn(result)
13173     else:
13174       result = mods
13175
13176     return result
13177
13178   @staticmethod
13179   def _CheckMods(kind, mods, key_types, item_fn):
13180     """Ensures requested disk/NIC modifications are valid.
13181
13182     """
13183     for (op, _, params) in mods:
13184       assert ht.TDict(params)
13185
13186       # If 'key_types' is an empty dict, we assume we have an
13187       # 'ext' template and thus do not ForceDictType
13188       if key_types:
13189         utils.ForceDictType(params, key_types)
13190
13191       if op == constants.DDM_REMOVE:
13192         if params:
13193           raise errors.OpPrereqError("No settings should be passed when"
13194                                      " removing a %s" % kind,
13195                                      errors.ECODE_INVAL)
13196       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13197         item_fn(op, params)
13198       else:
13199         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13200
13201   @staticmethod
13202   def _VerifyDiskModification(op, params):
13203     """Verifies a disk modification.
13204
13205     """
13206     if op == constants.DDM_ADD:
13207       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13208       if mode not in constants.DISK_ACCESS_SET:
13209         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13210                                    errors.ECODE_INVAL)
13211
13212       size = params.get(constants.IDISK_SIZE, None)
13213       if size is None:
13214         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13215                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13216
13217       try:
13218         size = int(size)
13219       except (TypeError, ValueError), err:
13220         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13221                                    errors.ECODE_INVAL)
13222
13223       params[constants.IDISK_SIZE] = size
13224
13225     elif op == constants.DDM_MODIFY:
13226       if constants.IDISK_SIZE in params:
13227         raise errors.OpPrereqError("Disk size change not possible, use"
13228                                    " grow-disk", errors.ECODE_INVAL)
13229       if constants.IDISK_MODE not in params:
13230         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13231                                    " modification supported, but missing",
13232                                    errors.ECODE_NOENT)
13233       if len(params) > 1:
13234         raise errors.OpPrereqError("Disk modification doesn't support"
13235                                    " additional arbitrary parameters",
13236                                    errors.ECODE_INVAL)
13237
13238   @staticmethod
13239   def _VerifyNicModification(op, params):
13240     """Verifies a network interface modification.
13241
13242     """
13243     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13244       ip = params.get(constants.INIC_IP, None)
13245       req_net = params.get(constants.INIC_NETWORK, None)
13246       link = params.get(constants.NIC_LINK, None)
13247       mode = params.get(constants.NIC_MODE, None)
13248       if req_net is not None:
13249         if req_net.lower() == constants.VALUE_NONE:
13250           params[constants.INIC_NETWORK] = None
13251           req_net = None
13252         elif link is not None or mode is not None:
13253           raise errors.OpPrereqError("If network is given"
13254                                      " mode or link should not",
13255                                      errors.ECODE_INVAL)
13256
13257       if op == constants.DDM_ADD:
13258         macaddr = params.get(constants.INIC_MAC, None)
13259         if macaddr is None:
13260           params[constants.INIC_MAC] = constants.VALUE_AUTO
13261
13262       if ip is not None:
13263         if ip.lower() == constants.VALUE_NONE:
13264           params[constants.INIC_IP] = None
13265         else:
13266           if ip.lower() == constants.NIC_IP_POOL:
13267             if op == constants.DDM_ADD and req_net is None:
13268               raise errors.OpPrereqError("If ip=pool, parameter network"
13269                                          " cannot be none",
13270                                          errors.ECODE_INVAL)
13271           else:
13272             if not netutils.IPAddress.IsValid(ip):
13273               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13274                                          errors.ECODE_INVAL)
13275
13276       if constants.INIC_MAC in params:
13277         macaddr = params[constants.INIC_MAC]
13278         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13279           macaddr = utils.NormalizeAndValidateMac(macaddr)
13280
13281         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13282           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13283                                      " modifying an existing NIC",
13284                                      errors.ECODE_INVAL)
13285
13286   def CheckArguments(self):
13287     if not (self.op.nics or self.op.disks or self.op.disk_template or
13288             self.op.hvparams or self.op.beparams or self.op.os_name or
13289             self.op.offline is not None or self.op.runtime_mem):
13290       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13291
13292     if self.op.hvparams:
13293       _CheckGlobalHvParams(self.op.hvparams)
13294
13295     self.op.disks = self._UpgradeDiskNicMods(
13296       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13297     self.op.nics = self._UpgradeDiskNicMods(
13298       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13299
13300     if self.op.disks and self.op.disk_template is not None:
13301       raise errors.OpPrereqError("Disk template conversion and other disk"
13302                                  " changes not supported at the same time",
13303                                  errors.ECODE_INVAL)
13304
13305     if (self.op.disk_template and
13306         self.op.disk_template in constants.DTS_INT_MIRROR and
13307         self.op.remote_node is None):
13308       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13309                                  " one requires specifying a secondary node",
13310                                  errors.ECODE_INVAL)
13311
13312     # Check NIC modifications
13313     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13314                     self._VerifyNicModification)
13315
13316   def ExpandNames(self):
13317     self._ExpandAndLockInstance()
13318     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13319     # Can't even acquire node locks in shared mode as upcoming changes in
13320     # Ganeti 2.6 will start to modify the node object on disk conversion
13321     self.needed_locks[locking.LEVEL_NODE] = []
13322     self.needed_locks[locking.LEVEL_NODE_RES] = []
13323     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13324     # Look node group to look up the ipolicy
13325     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13326
13327   def DeclareLocks(self, level):
13328     if level == locking.LEVEL_NODEGROUP:
13329       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13330       # Acquire locks for the instance's nodegroups optimistically. Needs
13331       # to be verified in CheckPrereq
13332       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13333         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13334     elif level == locking.LEVEL_NODE:
13335       self._LockInstancesNodes()
13336       if self.op.disk_template and self.op.remote_node:
13337         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13338         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13339     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13340       # Copy node locks
13341       self.needed_locks[locking.LEVEL_NODE_RES] = \
13342         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13343
13344   def BuildHooksEnv(self):
13345     """Build hooks env.
13346
13347     This runs on the master, primary and secondaries.
13348
13349     """
13350     args = {}
13351     if constants.BE_MINMEM in self.be_new:
13352       args["minmem"] = self.be_new[constants.BE_MINMEM]
13353     if constants.BE_MAXMEM in self.be_new:
13354       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13355     if constants.BE_VCPUS in self.be_new:
13356       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13357     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13358     # information at all.
13359
13360     if self._new_nics is not None:
13361       nics = []
13362
13363       for nic in self._new_nics:
13364         n = copy.deepcopy(nic)
13365         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13366         n.nicparams = nicparams
13367         nics.append(_NICToTuple(self, n))
13368
13369       args["nics"] = nics
13370
13371     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13372     if self.op.disk_template:
13373       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13374     if self.op.runtime_mem:
13375       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13376
13377     return env
13378
13379   def BuildHooksNodes(self):
13380     """Build hooks nodes.
13381
13382     """
13383     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13384     return (nl, nl)
13385
13386   def _PrepareNicModification(self, params, private, old_ip, old_net,
13387                               old_params, cluster, pnode):
13388
13389     update_params_dict = dict([(key, params[key])
13390                                for key in constants.NICS_PARAMETERS
13391                                if key in params])
13392
13393     req_link = update_params_dict.get(constants.NIC_LINK, None)
13394     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13395
13396     new_net = params.get(constants.INIC_NETWORK, old_net)
13397     if new_net is not None:
13398       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13399       if netparams is None:
13400         raise errors.OpPrereqError("No netparams found for the network"
13401                                    " %s, probably not connected" % new_net,
13402                                    errors.ECODE_INVAL)
13403       new_params = dict(netparams)
13404     else:
13405       new_params = _GetUpdatedParams(old_params, update_params_dict)
13406
13407     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13408
13409     new_filled_params = cluster.SimpleFillNIC(new_params)
13410     objects.NIC.CheckParameterSyntax(new_filled_params)
13411
13412     new_mode = new_filled_params[constants.NIC_MODE]
13413     if new_mode == constants.NIC_MODE_BRIDGED:
13414       bridge = new_filled_params[constants.NIC_LINK]
13415       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13416       if msg:
13417         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13418         if self.op.force:
13419           self.warn.append(msg)
13420         else:
13421           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13422
13423     elif new_mode == constants.NIC_MODE_ROUTED:
13424       ip = params.get(constants.INIC_IP, old_ip)
13425       if ip is None:
13426         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13427                                    " on a routed NIC", errors.ECODE_INVAL)
13428
13429     elif new_mode == constants.NIC_MODE_OVS:
13430       # TODO: check OVS link
13431       self.LogInfo("OVS links are currently not checked for correctness")
13432
13433     if constants.INIC_MAC in params:
13434       mac = params[constants.INIC_MAC]
13435       if mac is None:
13436         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13437                                    errors.ECODE_INVAL)
13438       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13439         # otherwise generate the MAC address
13440         params[constants.INIC_MAC] = \
13441           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13442       else:
13443         # or validate/reserve the current one
13444         try:
13445           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13446         except errors.ReservationError:
13447           raise errors.OpPrereqError("MAC address '%s' already in use"
13448                                      " in cluster" % mac,
13449                                      errors.ECODE_NOTUNIQUE)
13450     elif new_net != old_net:
13451
13452       def get_net_prefix(net):
13453         if net:
13454           uuid = self.cfg.LookupNetwork(net)
13455           if uuid:
13456             nobj = self.cfg.GetNetwork(uuid)
13457             return nobj.mac_prefix
13458         return None
13459
13460       new_prefix = get_net_prefix(new_net)
13461       old_prefix = get_net_prefix(old_net)
13462       if old_prefix != new_prefix:
13463         params[constants.INIC_MAC] = \
13464           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13465
13466     #if there is a change in nic-network configuration
13467     new_ip = params.get(constants.INIC_IP, old_ip)
13468     if (new_ip, new_net) != (old_ip, old_net):
13469       if new_ip:
13470         if new_net:
13471           if new_ip.lower() == constants.NIC_IP_POOL:
13472             try:
13473               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13474             except errors.ReservationError:
13475               raise errors.OpPrereqError("Unable to get a free IP"
13476                                          " from the address pool",
13477                                          errors.ECODE_STATE)
13478             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13479             params[constants.INIC_IP] = new_ip
13480           elif new_ip != old_ip or new_net != old_net:
13481             try:
13482               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13483               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13484             except errors.ReservationError:
13485               raise errors.OpPrereqError("IP %s not available in network %s" %
13486                                          (new_ip, new_net),
13487                                          errors.ECODE_NOTUNIQUE)
13488         elif new_ip.lower() == constants.NIC_IP_POOL:
13489           raise errors.OpPrereqError("ip=pool, but no network found",
13490                                      errors.ECODE_INVAL)
13491
13492         # new net is None
13493         elif self.op.conflicts_check:
13494           _CheckForConflictingIp(self, new_ip, pnode)
13495
13496       if old_ip:
13497         if old_net:
13498           try:
13499             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13500           except errors.AddressPoolError:
13501             logging.warning("Release IP %s not contained in network %s",
13502                             old_ip, old_net)
13503
13504     # there are no changes in (net, ip) tuple
13505     elif (old_net is not None and
13506           (req_link is not None or req_mode is not None)):
13507       raise errors.OpPrereqError("Not allowed to change link or mode of"
13508                                  " a NIC that is connected to a network",
13509                                  errors.ECODE_INVAL)
13510
13511     private.params = new_params
13512     private.filled = new_filled_params
13513
13514   def _PreCheckDiskTemplate(self, pnode_info):
13515     """CheckPrereq checks related to a new disk template."""
13516     # Arguments are passed to avoid configuration lookups
13517     instance = self.instance
13518     pnode = instance.primary_node
13519     cluster = self.cluster
13520     if instance.disk_template == self.op.disk_template:
13521       raise errors.OpPrereqError("Instance already has disk template %s" %
13522                                  instance.disk_template, errors.ECODE_INVAL)
13523
13524     if (instance.disk_template,
13525         self.op.disk_template) not in self._DISK_CONVERSIONS:
13526       raise errors.OpPrereqError("Unsupported disk template conversion from"
13527                                  " %s to %s" % (instance.disk_template,
13528                                                 self.op.disk_template),
13529                                  errors.ECODE_INVAL)
13530     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13531                         msg="cannot change disk template")
13532     if self.op.disk_template in constants.DTS_INT_MIRROR:
13533       if self.op.remote_node == pnode:
13534         raise errors.OpPrereqError("Given new secondary node %s is the same"
13535                                    " as the primary node of the instance" %
13536                                    self.op.remote_node, errors.ECODE_STATE)
13537       _CheckNodeOnline(self, self.op.remote_node)
13538       _CheckNodeNotDrained(self, self.op.remote_node)
13539       # FIXME: here we assume that the old instance type is DT_PLAIN
13540       assert instance.disk_template == constants.DT_PLAIN
13541       disks = [{constants.IDISK_SIZE: d.size,
13542                 constants.IDISK_VG: d.logical_id[0]}
13543                for d in instance.disks]
13544       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13545       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13546
13547       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13548       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13549       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13550                                                               snode_group)
13551       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13552                               ignore=self.op.ignore_ipolicy)
13553       if pnode_info.group != snode_info.group:
13554         self.LogWarning("The primary and secondary nodes are in two"
13555                         " different node groups; the disk parameters"
13556                         " from the first disk's node group will be"
13557                         " used")
13558
13559     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13560       # Make sure none of the nodes require exclusive storage
13561       nodes = [pnode_info]
13562       if self.op.disk_template in constants.DTS_INT_MIRROR:
13563         assert snode_info
13564         nodes.append(snode_info)
13565       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13566       if compat.any(map(has_es, nodes)):
13567         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13568                   " storage is enabled" % (instance.disk_template,
13569                                            self.op.disk_template))
13570         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13571
13572   def CheckPrereq(self):
13573     """Check prerequisites.
13574
13575     This only checks the instance list against the existing names.
13576
13577     """
13578     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13579     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13580
13581     cluster = self.cluster = self.cfg.GetClusterInfo()
13582     assert self.instance is not None, \
13583       "Cannot retrieve locked instance %s" % self.op.instance_name
13584
13585     pnode = instance.primary_node
13586     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13587     nodelist = list(instance.all_nodes)
13588     pnode_info = self.cfg.GetNodeInfo(pnode)
13589     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13590
13591     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13592     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13593     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13594
13595     # dictionary with instance information after the modification
13596     ispec = {}
13597
13598     # Check disk modifications. This is done here and not in CheckArguments
13599     # (as with NICs), because we need to know the instance's disk template
13600     if instance.disk_template == constants.DT_EXT:
13601       self._CheckMods("disk", self.op.disks, {},
13602                       self._VerifyDiskModification)
13603     else:
13604       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13605                       self._VerifyDiskModification)
13606
13607     # Prepare disk/NIC modifications
13608     self.diskmod = PrepareContainerMods(self.op.disks, None)
13609     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13610
13611     # Check the validity of the `provider' parameter
13612     if instance.disk_template in constants.DT_EXT:
13613       for mod in self.diskmod:
13614         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13615         if mod[0] == constants.DDM_ADD:
13616           if ext_provider is None:
13617             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13618                                        " '%s' missing, during disk add" %
13619                                        (constants.DT_EXT,
13620                                         constants.IDISK_PROVIDER),
13621                                        errors.ECODE_NOENT)
13622         elif mod[0] == constants.DDM_MODIFY:
13623           if ext_provider:
13624             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13625                                        " modification" %
13626                                        constants.IDISK_PROVIDER,
13627                                        errors.ECODE_INVAL)
13628     else:
13629       for mod in self.diskmod:
13630         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13631         if ext_provider is not None:
13632           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13633                                      " instances of type '%s'" %
13634                                      (constants.IDISK_PROVIDER,
13635                                       constants.DT_EXT),
13636                                      errors.ECODE_INVAL)
13637
13638     # OS change
13639     if self.op.os_name and not self.op.force:
13640       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13641                       self.op.force_variant)
13642       instance_os = self.op.os_name
13643     else:
13644       instance_os = instance.os
13645
13646     assert not (self.op.disk_template and self.op.disks), \
13647       "Can't modify disk template and apply disk changes at the same time"
13648
13649     if self.op.disk_template:
13650       self._PreCheckDiskTemplate(pnode_info)
13651
13652     # hvparams processing
13653     if self.op.hvparams:
13654       hv_type = instance.hypervisor
13655       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13656       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13657       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13658
13659       # local check
13660       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13661       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13662       self.hv_proposed = self.hv_new = hv_new # the new actual values
13663       self.hv_inst = i_hvdict # the new dict (without defaults)
13664     else:
13665       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13666                                               instance.hvparams)
13667       self.hv_new = self.hv_inst = {}
13668
13669     # beparams processing
13670     if self.op.beparams:
13671       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13672                                    use_none=True)
13673       objects.UpgradeBeParams(i_bedict)
13674       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13675       be_new = cluster.SimpleFillBE(i_bedict)
13676       self.be_proposed = self.be_new = be_new # the new actual values
13677       self.be_inst = i_bedict # the new dict (without defaults)
13678     else:
13679       self.be_new = self.be_inst = {}
13680       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13681     be_old = cluster.FillBE(instance)
13682
13683     # CPU param validation -- checking every time a parameter is
13684     # changed to cover all cases where either CPU mask or vcpus have
13685     # changed
13686     if (constants.BE_VCPUS in self.be_proposed and
13687         constants.HV_CPU_MASK in self.hv_proposed):
13688       cpu_list = \
13689         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13690       # Verify mask is consistent with number of vCPUs. Can skip this
13691       # test if only 1 entry in the CPU mask, which means same mask
13692       # is applied to all vCPUs.
13693       if (len(cpu_list) > 1 and
13694           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13695         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13696                                    " CPU mask [%s]" %
13697                                    (self.be_proposed[constants.BE_VCPUS],
13698                                     self.hv_proposed[constants.HV_CPU_MASK]),
13699                                    errors.ECODE_INVAL)
13700
13701       # Only perform this test if a new CPU mask is given
13702       if constants.HV_CPU_MASK in self.hv_new:
13703         # Calculate the largest CPU number requested
13704         max_requested_cpu = max(map(max, cpu_list))
13705         # Check that all of the instance's nodes have enough physical CPUs to
13706         # satisfy the requested CPU mask
13707         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13708                                 max_requested_cpu + 1, instance.hypervisor)
13709
13710     # osparams processing
13711     if self.op.osparams:
13712       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13713       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13714       self.os_inst = i_osdict # the new dict (without defaults)
13715     else:
13716       self.os_inst = {}
13717
13718     self.warn = []
13719
13720     #TODO(dynmem): do the appropriate check involving MINMEM
13721     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13722         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13723       mem_check_list = [pnode]
13724       if be_new[constants.BE_AUTO_BALANCE]:
13725         # either we changed auto_balance to yes or it was from before
13726         mem_check_list.extend(instance.secondary_nodes)
13727       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13728                                                   instance.hypervisor)
13729       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13730                                          [instance.hypervisor], False)
13731       pninfo = nodeinfo[pnode]
13732       msg = pninfo.fail_msg
13733       if msg:
13734         # Assume the primary node is unreachable and go ahead
13735         self.warn.append("Can't get info from primary node %s: %s" %
13736                          (pnode, msg))
13737       else:
13738         (_, _, (pnhvinfo, )) = pninfo.payload
13739         if not isinstance(pnhvinfo.get("memory_free", None), int):
13740           self.warn.append("Node data from primary node %s doesn't contain"
13741                            " free memory information" % pnode)
13742         elif instance_info.fail_msg:
13743           self.warn.append("Can't get instance runtime information: %s" %
13744                            instance_info.fail_msg)
13745         else:
13746           if instance_info.payload:
13747             current_mem = int(instance_info.payload["memory"])
13748           else:
13749             # Assume instance not running
13750             # (there is a slight race condition here, but it's not very
13751             # probable, and we have no other way to check)
13752             # TODO: Describe race condition
13753             current_mem = 0
13754           #TODO(dynmem): do the appropriate check involving MINMEM
13755           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13756                       pnhvinfo["memory_free"])
13757           if miss_mem > 0:
13758             raise errors.OpPrereqError("This change will prevent the instance"
13759                                        " from starting, due to %d MB of memory"
13760                                        " missing on its primary node" %
13761                                        miss_mem, errors.ECODE_NORES)
13762
13763       if be_new[constants.BE_AUTO_BALANCE]:
13764         for node, nres in nodeinfo.items():
13765           if node not in instance.secondary_nodes:
13766             continue
13767           nres.Raise("Can't get info from secondary node %s" % node,
13768                      prereq=True, ecode=errors.ECODE_STATE)
13769           (_, _, (nhvinfo, )) = nres.payload
13770           if not isinstance(nhvinfo.get("memory_free", None), int):
13771             raise errors.OpPrereqError("Secondary node %s didn't return free"
13772                                        " memory information" % node,
13773                                        errors.ECODE_STATE)
13774           #TODO(dynmem): do the appropriate check involving MINMEM
13775           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13776             raise errors.OpPrereqError("This change will prevent the instance"
13777                                        " from failover to its secondary node"
13778                                        " %s, due to not enough memory" % node,
13779                                        errors.ECODE_STATE)
13780
13781     if self.op.runtime_mem:
13782       remote_info = self.rpc.call_instance_info(instance.primary_node,
13783                                                 instance.name,
13784                                                 instance.hypervisor)
13785       remote_info.Raise("Error checking node %s" % instance.primary_node)
13786       if not remote_info.payload: # not running already
13787         raise errors.OpPrereqError("Instance %s is not running" %
13788                                    instance.name, errors.ECODE_STATE)
13789
13790       current_memory = remote_info.payload["memory"]
13791       if (not self.op.force and
13792            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13793             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13794         raise errors.OpPrereqError("Instance %s must have memory between %d"
13795                                    " and %d MB of memory unless --force is"
13796                                    " given" %
13797                                    (instance.name,
13798                                     self.be_proposed[constants.BE_MINMEM],
13799                                     self.be_proposed[constants.BE_MAXMEM]),
13800                                    errors.ECODE_INVAL)
13801
13802       delta = self.op.runtime_mem - current_memory
13803       if delta > 0:
13804         _CheckNodeFreeMemory(self, instance.primary_node,
13805                              "ballooning memory for instance %s" %
13806                              instance.name, delta, instance.hypervisor)
13807
13808     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13809       raise errors.OpPrereqError("Disk operations not supported for"
13810                                  " diskless instances", errors.ECODE_INVAL)
13811
13812     def _PrepareNicCreate(_, params, private):
13813       self._PrepareNicModification(params, private, None, None,
13814                                    {}, cluster, pnode)
13815       return (None, None)
13816
13817     def _PrepareNicMod(_, nic, params, private):
13818       self._PrepareNicModification(params, private, nic.ip, nic.network,
13819                                    nic.nicparams, cluster, pnode)
13820       return None
13821
13822     def _PrepareNicRemove(_, params, __):
13823       ip = params.ip
13824       net = params.network
13825       if net is not None and ip is not None:
13826         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13827
13828     # Verify NIC changes (operating on copy)
13829     nics = instance.nics[:]
13830     ApplyContainerMods("NIC", nics, None, self.nicmod,
13831                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13832     if len(nics) > constants.MAX_NICS:
13833       raise errors.OpPrereqError("Instance has too many network interfaces"
13834                                  " (%d), cannot add more" % constants.MAX_NICS,
13835                                  errors.ECODE_STATE)
13836
13837     # Verify disk changes (operating on a copy)
13838     disks = instance.disks[:]
13839     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13840     if len(disks) > constants.MAX_DISKS:
13841       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13842                                  " more" % constants.MAX_DISKS,
13843                                  errors.ECODE_STATE)
13844     disk_sizes = [disk.size for disk in instance.disks]
13845     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13846                       self.diskmod if op == constants.DDM_ADD)
13847     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13848     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13849
13850     if self.op.offline is not None and self.op.offline:
13851       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13852                           msg="can't change to offline")
13853
13854     # Pre-compute NIC changes (necessary to use result in hooks)
13855     self._nic_chgdesc = []
13856     if self.nicmod:
13857       # Operate on copies as this is still in prereq
13858       nics = [nic.Copy() for nic in instance.nics]
13859       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13860                          self._CreateNewNic, self._ApplyNicMods, None)
13861       self._new_nics = nics
13862       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13863     else:
13864       self._new_nics = None
13865       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13866
13867     if not self.op.ignore_ipolicy:
13868       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13869                                                               group_info)
13870
13871       # Fill ispec with backend parameters
13872       ispec[constants.ISPEC_SPINDLE_USE] = \
13873         self.be_new.get(constants.BE_SPINDLE_USE, None)
13874       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13875                                                          None)
13876
13877       # Copy ispec to verify parameters with min/max values separately
13878       ispec_max = ispec.copy()
13879       ispec_max[constants.ISPEC_MEM_SIZE] = \
13880         self.be_new.get(constants.BE_MAXMEM, None)
13881       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13882       ispec_min = ispec.copy()
13883       ispec_min[constants.ISPEC_MEM_SIZE] = \
13884         self.be_new.get(constants.BE_MINMEM, None)
13885       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13886
13887       if (res_max or res_min):
13888         # FIXME: Improve error message by including information about whether
13889         # the upper or lower limit of the parameter fails the ipolicy.
13890         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13891                (group_info, group_info.name,
13892                 utils.CommaJoin(set(res_max + res_min))))
13893         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13894
13895   def _ConvertPlainToDrbd(self, feedback_fn):
13896     """Converts an instance from plain to drbd.
13897
13898     """
13899     feedback_fn("Converting template to drbd")
13900     instance = self.instance
13901     pnode = instance.primary_node
13902     snode = self.op.remote_node
13903
13904     assert instance.disk_template == constants.DT_PLAIN
13905
13906     # create a fake disk info for _GenerateDiskTemplate
13907     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13908                   constants.IDISK_VG: d.logical_id[0]}
13909                  for d in instance.disks]
13910     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13911                                       instance.name, pnode, [snode],
13912                                       disk_info, None, None, 0, feedback_fn,
13913                                       self.diskparams)
13914     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13915                                         self.diskparams)
13916     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13917     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13918     info = _GetInstanceInfoText(instance)
13919     feedback_fn("Creating additional volumes...")
13920     # first, create the missing data and meta devices
13921     for disk in anno_disks:
13922       # unfortunately this is... not too nice
13923       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13924                             info, True, p_excl_stor)
13925       for child in disk.children:
13926         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13927                               s_excl_stor)
13928     # at this stage, all new LVs have been created, we can rename the
13929     # old ones
13930     feedback_fn("Renaming original volumes...")
13931     rename_list = [(o, n.children[0].logical_id)
13932                    for (o, n) in zip(instance.disks, new_disks)]
13933     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13934     result.Raise("Failed to rename original LVs")
13935
13936     feedback_fn("Initializing DRBD devices...")
13937     # all child devices are in place, we can now create the DRBD devices
13938     for disk in anno_disks:
13939       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13940         f_create = node == pnode
13941         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13942                               excl_stor)
13943
13944     # at this point, the instance has been modified
13945     instance.disk_template = constants.DT_DRBD8
13946     instance.disks = new_disks
13947     self.cfg.Update(instance, feedback_fn)
13948
13949     # Release node locks while waiting for sync
13950     _ReleaseLocks(self, locking.LEVEL_NODE)
13951
13952     # disks are created, waiting for sync
13953     disk_abort = not _WaitForSync(self, instance,
13954                                   oneshot=not self.op.wait_for_sync)
13955     if disk_abort:
13956       raise errors.OpExecError("There are some degraded disks for"
13957                                " this instance, please cleanup manually")
13958
13959     # Node resource locks will be released by caller
13960
13961   def _ConvertDrbdToPlain(self, feedback_fn):
13962     """Converts an instance from drbd to plain.
13963
13964     """
13965     instance = self.instance
13966
13967     assert len(instance.secondary_nodes) == 1
13968     assert instance.disk_template == constants.DT_DRBD8
13969
13970     pnode = instance.primary_node
13971     snode = instance.secondary_nodes[0]
13972     feedback_fn("Converting template to plain")
13973
13974     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13975     new_disks = [d.children[0] for d in instance.disks]
13976
13977     # copy over size and mode
13978     for parent, child in zip(old_disks, new_disks):
13979       child.size = parent.size
13980       child.mode = parent.mode
13981
13982     # this is a DRBD disk, return its port to the pool
13983     # NOTE: this must be done right before the call to cfg.Update!
13984     for disk in old_disks:
13985       tcp_port = disk.logical_id[2]
13986       self.cfg.AddTcpUdpPort(tcp_port)
13987
13988     # update instance structure
13989     instance.disks = new_disks
13990     instance.disk_template = constants.DT_PLAIN
13991     self.cfg.Update(instance, feedback_fn)
13992
13993     # Release locks in case removing disks takes a while
13994     _ReleaseLocks(self, locking.LEVEL_NODE)
13995
13996     feedback_fn("Removing volumes on the secondary node...")
13997     for disk in old_disks:
13998       self.cfg.SetDiskID(disk, snode)
13999       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14000       if msg:
14001         self.LogWarning("Could not remove block device %s on node %s,"
14002                         " continuing anyway: %s", disk.iv_name, snode, msg)
14003
14004     feedback_fn("Removing unneeded volumes on the primary node...")
14005     for idx, disk in enumerate(old_disks):
14006       meta = disk.children[1]
14007       self.cfg.SetDiskID(meta, pnode)
14008       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14009       if msg:
14010         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14011                         " continuing anyway: %s", idx, pnode, msg)
14012
14013   def _CreateNewDisk(self, idx, params, _):
14014     """Creates a new disk.
14015
14016     """
14017     instance = self.instance
14018
14019     # add a new disk
14020     if instance.disk_template in constants.DTS_FILEBASED:
14021       (file_driver, file_path) = instance.disks[0].logical_id
14022       file_path = os.path.dirname(file_path)
14023     else:
14024       file_driver = file_path = None
14025
14026     disk = \
14027       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14028                             instance.primary_node, instance.secondary_nodes,
14029                             [params], file_path, file_driver, idx,
14030                             self.Log, self.diskparams)[0]
14031
14032     info = _GetInstanceInfoText(instance)
14033
14034     logging.info("Creating volume %s for instance %s",
14035                  disk.iv_name, instance.name)
14036     # Note: this needs to be kept in sync with _CreateDisks
14037     #HARDCODE
14038     for node in instance.all_nodes:
14039       f_create = (node == instance.primary_node)
14040       try:
14041         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14042       except errors.OpExecError, err:
14043         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14044                         disk.iv_name, disk, node, err)
14045
14046     return (disk, [
14047       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14048       ])
14049
14050   @staticmethod
14051   def _ModifyDisk(idx, disk, params, _):
14052     """Modifies a disk.
14053
14054     """
14055     disk.mode = params[constants.IDISK_MODE]
14056
14057     return [
14058       ("disk.mode/%d" % idx, disk.mode),
14059       ]
14060
14061   def _RemoveDisk(self, idx, root, _):
14062     """Removes a disk.
14063
14064     """
14065     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14066     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14067       self.cfg.SetDiskID(disk, node)
14068       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14069       if msg:
14070         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14071                         " continuing anyway", idx, node, msg)
14072
14073     # if this is a DRBD disk, return its port to the pool
14074     if root.dev_type in constants.LDS_DRBD:
14075       self.cfg.AddTcpUdpPort(root.logical_id[2])
14076
14077   @staticmethod
14078   def _CreateNewNic(idx, params, private):
14079     """Creates data structure for a new network interface.
14080
14081     """
14082     mac = params[constants.INIC_MAC]
14083     ip = params.get(constants.INIC_IP, None)
14084     net = params.get(constants.INIC_NETWORK, None)
14085     #TODO: not private.filled?? can a nic have no nicparams??
14086     nicparams = private.filled
14087
14088     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14089       ("nic.%d" % idx,
14090        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14091        (mac, ip, private.filled[constants.NIC_MODE],
14092        private.filled[constants.NIC_LINK],
14093        net)),
14094       ])
14095
14096   @staticmethod
14097   def _ApplyNicMods(idx, nic, params, private):
14098     """Modifies a network interface.
14099
14100     """
14101     changes = []
14102
14103     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14104       if key in params:
14105         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14106         setattr(nic, key, params[key])
14107
14108     if private.filled:
14109       nic.nicparams = private.filled
14110
14111       for (key, val) in nic.nicparams.items():
14112         changes.append(("nic.%s/%d" % (key, idx), val))
14113
14114     return changes
14115
14116   def Exec(self, feedback_fn):
14117     """Modifies an instance.
14118
14119     All parameters take effect only at the next restart of the instance.
14120
14121     """
14122     # Process here the warnings from CheckPrereq, as we don't have a
14123     # feedback_fn there.
14124     # TODO: Replace with self.LogWarning
14125     for warn in self.warn:
14126       feedback_fn("WARNING: %s" % warn)
14127
14128     assert ((self.op.disk_template is None) ^
14129             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14130       "Not owning any node resource locks"
14131
14132     result = []
14133     instance = self.instance
14134
14135     # runtime memory
14136     if self.op.runtime_mem:
14137       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14138                                                      instance,
14139                                                      self.op.runtime_mem)
14140       rpcres.Raise("Cannot modify instance runtime memory")
14141       result.append(("runtime_memory", self.op.runtime_mem))
14142
14143     # Apply disk changes
14144     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14145                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14146     _UpdateIvNames(0, instance.disks)
14147
14148     if self.op.disk_template:
14149       if __debug__:
14150         check_nodes = set(instance.all_nodes)
14151         if self.op.remote_node:
14152           check_nodes.add(self.op.remote_node)
14153         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14154           owned = self.owned_locks(level)
14155           assert not (check_nodes - owned), \
14156             ("Not owning the correct locks, owning %r, expected at least %r" %
14157              (owned, check_nodes))
14158
14159       r_shut = _ShutdownInstanceDisks(self, instance)
14160       if not r_shut:
14161         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14162                                  " proceed with disk template conversion")
14163       mode = (instance.disk_template, self.op.disk_template)
14164       try:
14165         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14166       except:
14167         self.cfg.ReleaseDRBDMinors(instance.name)
14168         raise
14169       result.append(("disk_template", self.op.disk_template))
14170
14171       assert instance.disk_template == self.op.disk_template, \
14172         ("Expected disk template '%s', found '%s'" %
14173          (self.op.disk_template, instance.disk_template))
14174
14175     # Release node and resource locks if there are any (they might already have
14176     # been released during disk conversion)
14177     _ReleaseLocks(self, locking.LEVEL_NODE)
14178     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14179
14180     # Apply NIC changes
14181     if self._new_nics is not None:
14182       instance.nics = self._new_nics
14183       result.extend(self._nic_chgdesc)
14184
14185     # hvparams changes
14186     if self.op.hvparams:
14187       instance.hvparams = self.hv_inst
14188       for key, val in self.op.hvparams.iteritems():
14189         result.append(("hv/%s" % key, val))
14190
14191     # beparams changes
14192     if self.op.beparams:
14193       instance.beparams = self.be_inst
14194       for key, val in self.op.beparams.iteritems():
14195         result.append(("be/%s" % key, val))
14196
14197     # OS change
14198     if self.op.os_name:
14199       instance.os = self.op.os_name
14200
14201     # osparams changes
14202     if self.op.osparams:
14203       instance.osparams = self.os_inst
14204       for key, val in self.op.osparams.iteritems():
14205         result.append(("os/%s" % key, val))
14206
14207     if self.op.offline is None:
14208       # Ignore
14209       pass
14210     elif self.op.offline:
14211       # Mark instance as offline
14212       self.cfg.MarkInstanceOffline(instance.name)
14213       result.append(("admin_state", constants.ADMINST_OFFLINE))
14214     else:
14215       # Mark instance as online, but stopped
14216       self.cfg.MarkInstanceDown(instance.name)
14217       result.append(("admin_state", constants.ADMINST_DOWN))
14218
14219     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14220
14221     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14222                 self.owned_locks(locking.LEVEL_NODE)), \
14223       "All node locks should have been released by now"
14224
14225     return result
14226
14227   _DISK_CONVERSIONS = {
14228     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14229     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14230     }
14231
14232
14233 class LUInstanceChangeGroup(LogicalUnit):
14234   HPATH = "instance-change-group"
14235   HTYPE = constants.HTYPE_INSTANCE
14236   REQ_BGL = False
14237
14238   def ExpandNames(self):
14239     self.share_locks = _ShareAll()
14240
14241     self.needed_locks = {
14242       locking.LEVEL_NODEGROUP: [],
14243       locking.LEVEL_NODE: [],
14244       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14245       }
14246
14247     self._ExpandAndLockInstance()
14248
14249     if self.op.target_groups:
14250       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14251                                   self.op.target_groups)
14252     else:
14253       self.req_target_uuids = None
14254
14255     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14256
14257   def DeclareLocks(self, level):
14258     if level == locking.LEVEL_NODEGROUP:
14259       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14260
14261       if self.req_target_uuids:
14262         lock_groups = set(self.req_target_uuids)
14263
14264         # Lock all groups used by instance optimistically; this requires going
14265         # via the node before it's locked, requiring verification later on
14266         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14267         lock_groups.update(instance_groups)
14268       else:
14269         # No target groups, need to lock all of them
14270         lock_groups = locking.ALL_SET
14271
14272       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14273
14274     elif level == locking.LEVEL_NODE:
14275       if self.req_target_uuids:
14276         # Lock all nodes used by instances
14277         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14278         self._LockInstancesNodes()
14279
14280         # Lock all nodes in all potential target groups
14281         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14282                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14283         member_nodes = [node_name
14284                         for group in lock_groups
14285                         for node_name in self.cfg.GetNodeGroup(group).members]
14286         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14287       else:
14288         # Lock all nodes as all groups are potential targets
14289         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14290
14291   def CheckPrereq(self):
14292     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14293     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14294     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14295
14296     assert (self.req_target_uuids is None or
14297             owned_groups.issuperset(self.req_target_uuids))
14298     assert owned_instances == set([self.op.instance_name])
14299
14300     # Get instance information
14301     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14302
14303     # Check if node groups for locked instance are still correct
14304     assert owned_nodes.issuperset(self.instance.all_nodes), \
14305       ("Instance %s's nodes changed while we kept the lock" %
14306        self.op.instance_name)
14307
14308     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14309                                            owned_groups)
14310
14311     if self.req_target_uuids:
14312       # User requested specific target groups
14313       self.target_uuids = frozenset(self.req_target_uuids)
14314     else:
14315       # All groups except those used by the instance are potential targets
14316       self.target_uuids = owned_groups - inst_groups
14317
14318     conflicting_groups = self.target_uuids & inst_groups
14319     if conflicting_groups:
14320       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14321                                  " used by the instance '%s'" %
14322                                  (utils.CommaJoin(conflicting_groups),
14323                                   self.op.instance_name),
14324                                  errors.ECODE_INVAL)
14325
14326     if not self.target_uuids:
14327       raise errors.OpPrereqError("There are no possible target groups",
14328                                  errors.ECODE_INVAL)
14329
14330   def BuildHooksEnv(self):
14331     """Build hooks env.
14332
14333     """
14334     assert self.target_uuids
14335
14336     env = {
14337       "TARGET_GROUPS": " ".join(self.target_uuids),
14338       }
14339
14340     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14341
14342     return env
14343
14344   def BuildHooksNodes(self):
14345     """Build hooks nodes.
14346
14347     """
14348     mn = self.cfg.GetMasterNode()
14349     return ([mn], [mn])
14350
14351   def Exec(self, feedback_fn):
14352     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14353
14354     assert instances == [self.op.instance_name], "Instance not locked"
14355
14356     req = iallocator.IAReqGroupChange(instances=instances,
14357                                       target_groups=list(self.target_uuids))
14358     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14359
14360     ial.Run(self.op.iallocator)
14361
14362     if not ial.success:
14363       raise errors.OpPrereqError("Can't compute solution for changing group of"
14364                                  " instance '%s' using iallocator '%s': %s" %
14365                                  (self.op.instance_name, self.op.iallocator,
14366                                   ial.info), errors.ECODE_NORES)
14367
14368     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14369
14370     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14371                  " instance '%s'", len(jobs), self.op.instance_name)
14372
14373     return ResultWithJobs(jobs)
14374
14375
14376 class LUBackupQuery(NoHooksLU):
14377   """Query the exports list
14378
14379   """
14380   REQ_BGL = False
14381
14382   def CheckArguments(self):
14383     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14384                              ["node", "export"], self.op.use_locking)
14385
14386   def ExpandNames(self):
14387     self.expq.ExpandNames(self)
14388
14389   def DeclareLocks(self, level):
14390     self.expq.DeclareLocks(self, level)
14391
14392   def Exec(self, feedback_fn):
14393     result = {}
14394
14395     for (node, expname) in self.expq.OldStyleQuery(self):
14396       if expname is None:
14397         result[node] = False
14398       else:
14399         result.setdefault(node, []).append(expname)
14400
14401     return result
14402
14403
14404 class _ExportQuery(_QueryBase):
14405   FIELDS = query.EXPORT_FIELDS
14406
14407   #: The node name is not a unique key for this query
14408   SORT_FIELD = "node"
14409
14410   def ExpandNames(self, lu):
14411     lu.needed_locks = {}
14412
14413     # The following variables interact with _QueryBase._GetNames
14414     if self.names:
14415       self.wanted = _GetWantedNodes(lu, self.names)
14416     else:
14417       self.wanted = locking.ALL_SET
14418
14419     self.do_locking = self.use_locking
14420
14421     if self.do_locking:
14422       lu.share_locks = _ShareAll()
14423       lu.needed_locks = {
14424         locking.LEVEL_NODE: self.wanted,
14425         }
14426
14427       if not self.names:
14428         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14429
14430   def DeclareLocks(self, lu, level):
14431     pass
14432
14433   def _GetQueryData(self, lu):
14434     """Computes the list of nodes and their attributes.
14435
14436     """
14437     # Locking is not used
14438     # TODO
14439     assert not (compat.any(lu.glm.is_owned(level)
14440                            for level in locking.LEVELS
14441                            if level != locking.LEVEL_CLUSTER) or
14442                 self.do_locking or self.use_locking)
14443
14444     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14445
14446     result = []
14447
14448     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14449       if nres.fail_msg:
14450         result.append((node, None))
14451       else:
14452         result.extend((node, expname) for expname in nres.payload)
14453
14454     return result
14455
14456
14457 class LUBackupPrepare(NoHooksLU):
14458   """Prepares an instance for an export and returns useful information.
14459
14460   """
14461   REQ_BGL = False
14462
14463   def ExpandNames(self):
14464     self._ExpandAndLockInstance()
14465
14466   def CheckPrereq(self):
14467     """Check prerequisites.
14468
14469     """
14470     instance_name = self.op.instance_name
14471
14472     self.instance = self.cfg.GetInstanceInfo(instance_name)
14473     assert self.instance is not None, \
14474           "Cannot retrieve locked instance %s" % self.op.instance_name
14475     _CheckNodeOnline(self, self.instance.primary_node)
14476
14477     self._cds = _GetClusterDomainSecret()
14478
14479   def Exec(self, feedback_fn):
14480     """Prepares an instance for an export.
14481
14482     """
14483     instance = self.instance
14484
14485     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14486       salt = utils.GenerateSecret(8)
14487
14488       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14489       result = self.rpc.call_x509_cert_create(instance.primary_node,
14490                                               constants.RIE_CERT_VALIDITY)
14491       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14492
14493       (name, cert_pem) = result.payload
14494
14495       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14496                                              cert_pem)
14497
14498       return {
14499         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14500         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14501                           salt),
14502         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14503         }
14504
14505     return None
14506
14507
14508 class LUBackupExport(LogicalUnit):
14509   """Export an instance to an image in the cluster.
14510
14511   """
14512   HPATH = "instance-export"
14513   HTYPE = constants.HTYPE_INSTANCE
14514   REQ_BGL = False
14515
14516   def CheckArguments(self):
14517     """Check the arguments.
14518
14519     """
14520     self.x509_key_name = self.op.x509_key_name
14521     self.dest_x509_ca_pem = self.op.destination_x509_ca
14522
14523     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14524       if not self.x509_key_name:
14525         raise errors.OpPrereqError("Missing X509 key name for encryption",
14526                                    errors.ECODE_INVAL)
14527
14528       if not self.dest_x509_ca_pem:
14529         raise errors.OpPrereqError("Missing destination X509 CA",
14530                                    errors.ECODE_INVAL)
14531
14532   def ExpandNames(self):
14533     self._ExpandAndLockInstance()
14534
14535     # Lock all nodes for local exports
14536     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14537       # FIXME: lock only instance primary and destination node
14538       #
14539       # Sad but true, for now we have do lock all nodes, as we don't know where
14540       # the previous export might be, and in this LU we search for it and
14541       # remove it from its current node. In the future we could fix this by:
14542       #  - making a tasklet to search (share-lock all), then create the
14543       #    new one, then one to remove, after
14544       #  - removing the removal operation altogether
14545       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14546
14547       # Allocations should be stopped while this LU runs with node locks, but
14548       # it doesn't have to be exclusive
14549       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14550       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14551
14552   def DeclareLocks(self, level):
14553     """Last minute lock declaration."""
14554     # All nodes are locked anyway, so nothing to do here.
14555
14556   def BuildHooksEnv(self):
14557     """Build hooks env.
14558
14559     This will run on the master, primary node and target node.
14560
14561     """
14562     env = {
14563       "EXPORT_MODE": self.op.mode,
14564       "EXPORT_NODE": self.op.target_node,
14565       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14566       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14567       # TODO: Generic function for boolean env variables
14568       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14569       }
14570
14571     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14572
14573     return env
14574
14575   def BuildHooksNodes(self):
14576     """Build hooks nodes.
14577
14578     """
14579     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14580
14581     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14582       nl.append(self.op.target_node)
14583
14584     return (nl, nl)
14585
14586   def CheckPrereq(self):
14587     """Check prerequisites.
14588
14589     This checks that the instance and node names are valid.
14590
14591     """
14592     instance_name = self.op.instance_name
14593
14594     self.instance = self.cfg.GetInstanceInfo(instance_name)
14595     assert self.instance is not None, \
14596           "Cannot retrieve locked instance %s" % self.op.instance_name
14597     _CheckNodeOnline(self, self.instance.primary_node)
14598
14599     if (self.op.remove_instance and
14600         self.instance.admin_state == constants.ADMINST_UP and
14601         not self.op.shutdown):
14602       raise errors.OpPrereqError("Can not remove instance without shutting it"
14603                                  " down before", errors.ECODE_STATE)
14604
14605     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14606       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14607       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14608       assert self.dst_node is not None
14609
14610       _CheckNodeOnline(self, self.dst_node.name)
14611       _CheckNodeNotDrained(self, self.dst_node.name)
14612
14613       self._cds = None
14614       self.dest_disk_info = None
14615       self.dest_x509_ca = None
14616
14617     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14618       self.dst_node = None
14619
14620       if len(self.op.target_node) != len(self.instance.disks):
14621         raise errors.OpPrereqError(("Received destination information for %s"
14622                                     " disks, but instance %s has %s disks") %
14623                                    (len(self.op.target_node), instance_name,
14624                                     len(self.instance.disks)),
14625                                    errors.ECODE_INVAL)
14626
14627       cds = _GetClusterDomainSecret()
14628
14629       # Check X509 key name
14630       try:
14631         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14632       except (TypeError, ValueError), err:
14633         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14634                                    errors.ECODE_INVAL)
14635
14636       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14637         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14638                                    errors.ECODE_INVAL)
14639
14640       # Load and verify CA
14641       try:
14642         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14643       except OpenSSL.crypto.Error, err:
14644         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14645                                    (err, ), errors.ECODE_INVAL)
14646
14647       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14648       if errcode is not None:
14649         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14650                                    (msg, ), errors.ECODE_INVAL)
14651
14652       self.dest_x509_ca = cert
14653
14654       # Verify target information
14655       disk_info = []
14656       for idx, disk_data in enumerate(self.op.target_node):
14657         try:
14658           (host, port, magic) = \
14659             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14660         except errors.GenericError, err:
14661           raise errors.OpPrereqError("Target info for disk %s: %s" %
14662                                      (idx, err), errors.ECODE_INVAL)
14663
14664         disk_info.append((host, port, magic))
14665
14666       assert len(disk_info) == len(self.op.target_node)
14667       self.dest_disk_info = disk_info
14668
14669     else:
14670       raise errors.ProgrammerError("Unhandled export mode %r" %
14671                                    self.op.mode)
14672
14673     # instance disk type verification
14674     # TODO: Implement export support for file-based disks
14675     for disk in self.instance.disks:
14676       if disk.dev_type == constants.LD_FILE:
14677         raise errors.OpPrereqError("Export not supported for instances with"
14678                                    " file-based disks", errors.ECODE_INVAL)
14679
14680   def _CleanupExports(self, feedback_fn):
14681     """Removes exports of current instance from all other nodes.
14682
14683     If an instance in a cluster with nodes A..D was exported to node C, its
14684     exports will be removed from the nodes A, B and D.
14685
14686     """
14687     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14688
14689     nodelist = self.cfg.GetNodeList()
14690     nodelist.remove(self.dst_node.name)
14691
14692     # on one-node clusters nodelist will be empty after the removal
14693     # if we proceed the backup would be removed because OpBackupQuery
14694     # substitutes an empty list with the full cluster node list.
14695     iname = self.instance.name
14696     if nodelist:
14697       feedback_fn("Removing old exports for instance %s" % iname)
14698       exportlist = self.rpc.call_export_list(nodelist)
14699       for node in exportlist:
14700         if exportlist[node].fail_msg:
14701           continue
14702         if iname in exportlist[node].payload:
14703           msg = self.rpc.call_export_remove(node, iname).fail_msg
14704           if msg:
14705             self.LogWarning("Could not remove older export for instance %s"
14706                             " on node %s: %s", iname, node, msg)
14707
14708   def Exec(self, feedback_fn):
14709     """Export an instance to an image in the cluster.
14710
14711     """
14712     assert self.op.mode in constants.EXPORT_MODES
14713
14714     instance = self.instance
14715     src_node = instance.primary_node
14716
14717     if self.op.shutdown:
14718       # shutdown the instance, but not the disks
14719       feedback_fn("Shutting down instance %s" % instance.name)
14720       result = self.rpc.call_instance_shutdown(src_node, instance,
14721                                                self.op.shutdown_timeout)
14722       # TODO: Maybe ignore failures if ignore_remove_failures is set
14723       result.Raise("Could not shutdown instance %s on"
14724                    " node %s" % (instance.name, src_node))
14725
14726     # set the disks ID correctly since call_instance_start needs the
14727     # correct drbd minor to create the symlinks
14728     for disk in instance.disks:
14729       self.cfg.SetDiskID(disk, src_node)
14730
14731     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14732
14733     if activate_disks:
14734       # Activate the instance disks if we'exporting a stopped instance
14735       feedback_fn("Activating disks for %s" % instance.name)
14736       _StartInstanceDisks(self, instance, None)
14737
14738     try:
14739       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14740                                                      instance)
14741
14742       helper.CreateSnapshots()
14743       try:
14744         if (self.op.shutdown and
14745             instance.admin_state == constants.ADMINST_UP and
14746             not self.op.remove_instance):
14747           assert not activate_disks
14748           feedback_fn("Starting instance %s" % instance.name)
14749           result = self.rpc.call_instance_start(src_node,
14750                                                 (instance, None, None), False)
14751           msg = result.fail_msg
14752           if msg:
14753             feedback_fn("Failed to start instance: %s" % msg)
14754             _ShutdownInstanceDisks(self, instance)
14755             raise errors.OpExecError("Could not start instance: %s" % msg)
14756
14757         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14758           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14759         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14760           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14761           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14762
14763           (key_name, _, _) = self.x509_key_name
14764
14765           dest_ca_pem = \
14766             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14767                                             self.dest_x509_ca)
14768
14769           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14770                                                      key_name, dest_ca_pem,
14771                                                      timeouts)
14772       finally:
14773         helper.Cleanup()
14774
14775       # Check for backwards compatibility
14776       assert len(dresults) == len(instance.disks)
14777       assert compat.all(isinstance(i, bool) for i in dresults), \
14778              "Not all results are boolean: %r" % dresults
14779
14780     finally:
14781       if activate_disks:
14782         feedback_fn("Deactivating disks for %s" % instance.name)
14783         _ShutdownInstanceDisks(self, instance)
14784
14785     if not (compat.all(dresults) and fin_resu):
14786       failures = []
14787       if not fin_resu:
14788         failures.append("export finalization")
14789       if not compat.all(dresults):
14790         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14791                                if not dsk)
14792         failures.append("disk export: disk(s) %s" % fdsk)
14793
14794       raise errors.OpExecError("Export failed, errors in %s" %
14795                                utils.CommaJoin(failures))
14796
14797     # At this point, the export was successful, we can cleanup/finish
14798
14799     # Remove instance if requested
14800     if self.op.remove_instance:
14801       feedback_fn("Removing instance %s" % instance.name)
14802       _RemoveInstance(self, feedback_fn, instance,
14803                       self.op.ignore_remove_failures)
14804
14805     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14806       self._CleanupExports(feedback_fn)
14807
14808     return fin_resu, dresults
14809
14810
14811 class LUBackupRemove(NoHooksLU):
14812   """Remove exports related to the named instance.
14813
14814   """
14815   REQ_BGL = False
14816
14817   def ExpandNames(self):
14818     self.needed_locks = {
14819       # We need all nodes to be locked in order for RemoveExport to work, but
14820       # we don't need to lock the instance itself, as nothing will happen to it
14821       # (and we can remove exports also for a removed instance)
14822       locking.LEVEL_NODE: locking.ALL_SET,
14823
14824       # Removing backups is quick, so blocking allocations is justified
14825       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14826       }
14827
14828     # Allocations should be stopped while this LU runs with node locks, but it
14829     # doesn't have to be exclusive
14830     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14831
14832   def Exec(self, feedback_fn):
14833     """Remove any export.
14834
14835     """
14836     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14837     # If the instance was not found we'll try with the name that was passed in.
14838     # This will only work if it was an FQDN, though.
14839     fqdn_warn = False
14840     if not instance_name:
14841       fqdn_warn = True
14842       instance_name = self.op.instance_name
14843
14844     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14845     exportlist = self.rpc.call_export_list(locked_nodes)
14846     found = False
14847     for node in exportlist:
14848       msg = exportlist[node].fail_msg
14849       if msg:
14850         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14851         continue
14852       if instance_name in exportlist[node].payload:
14853         found = True
14854         result = self.rpc.call_export_remove(node, instance_name)
14855         msg = result.fail_msg
14856         if msg:
14857           logging.error("Could not remove export for instance %s"
14858                         " on node %s: %s", instance_name, node, msg)
14859
14860     if fqdn_warn and not found:
14861       feedback_fn("Export not found. If trying to remove an export belonging"
14862                   " to a deleted instance please use its Fully Qualified"
14863                   " Domain Name.")
14864
14865
14866 class LUGroupAdd(LogicalUnit):
14867   """Logical unit for creating node groups.
14868
14869   """
14870   HPATH = "group-add"
14871   HTYPE = constants.HTYPE_GROUP
14872   REQ_BGL = False
14873
14874   def ExpandNames(self):
14875     # We need the new group's UUID here so that we can create and acquire the
14876     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14877     # that it should not check whether the UUID exists in the configuration.
14878     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14879     self.needed_locks = {}
14880     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14881
14882   def CheckPrereq(self):
14883     """Check prerequisites.
14884
14885     This checks that the given group name is not an existing node group
14886     already.
14887
14888     """
14889     try:
14890       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14891     except errors.OpPrereqError:
14892       pass
14893     else:
14894       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14895                                  " node group (UUID: %s)" %
14896                                  (self.op.group_name, existing_uuid),
14897                                  errors.ECODE_EXISTS)
14898
14899     if self.op.ndparams:
14900       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14901
14902     if self.op.hv_state:
14903       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14904     else:
14905       self.new_hv_state = None
14906
14907     if self.op.disk_state:
14908       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14909     else:
14910       self.new_disk_state = None
14911
14912     if self.op.diskparams:
14913       for templ in constants.DISK_TEMPLATES:
14914         if templ in self.op.diskparams:
14915           utils.ForceDictType(self.op.diskparams[templ],
14916                               constants.DISK_DT_TYPES)
14917       self.new_diskparams = self.op.diskparams
14918       try:
14919         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14920       except errors.OpPrereqError, err:
14921         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14922                                    errors.ECODE_INVAL)
14923     else:
14924       self.new_diskparams = {}
14925
14926     if self.op.ipolicy:
14927       cluster = self.cfg.GetClusterInfo()
14928       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14929       try:
14930         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14931       except errors.ConfigurationError, err:
14932         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14933                                    errors.ECODE_INVAL)
14934
14935   def BuildHooksEnv(self):
14936     """Build hooks env.
14937
14938     """
14939     return {
14940       "GROUP_NAME": self.op.group_name,
14941       }
14942
14943   def BuildHooksNodes(self):
14944     """Build hooks nodes.
14945
14946     """
14947     mn = self.cfg.GetMasterNode()
14948     return ([mn], [mn])
14949
14950   def Exec(self, feedback_fn):
14951     """Add the node group to the cluster.
14952
14953     """
14954     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14955                                   uuid=self.group_uuid,
14956                                   alloc_policy=self.op.alloc_policy,
14957                                   ndparams=self.op.ndparams,
14958                                   diskparams=self.new_diskparams,
14959                                   ipolicy=self.op.ipolicy,
14960                                   hv_state_static=self.new_hv_state,
14961                                   disk_state_static=self.new_disk_state)
14962
14963     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14964     del self.remove_locks[locking.LEVEL_NODEGROUP]
14965
14966
14967 class LUGroupAssignNodes(NoHooksLU):
14968   """Logical unit for assigning nodes to groups.
14969
14970   """
14971   REQ_BGL = False
14972
14973   def ExpandNames(self):
14974     # These raise errors.OpPrereqError on their own:
14975     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14976     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14977
14978     # We want to lock all the affected nodes and groups. We have readily
14979     # available the list of nodes, and the *destination* group. To gather the
14980     # list of "source" groups, we need to fetch node information later on.
14981     self.needed_locks = {
14982       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14983       locking.LEVEL_NODE: self.op.nodes,
14984       }
14985
14986   def DeclareLocks(self, level):
14987     if level == locking.LEVEL_NODEGROUP:
14988       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14989
14990       # Try to get all affected nodes' groups without having the group or node
14991       # lock yet. Needs verification later in the code flow.
14992       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14993
14994       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14995
14996   def CheckPrereq(self):
14997     """Check prerequisites.
14998
14999     """
15000     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15001     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15002             frozenset(self.op.nodes))
15003
15004     expected_locks = (set([self.group_uuid]) |
15005                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15006     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15007     if actual_locks != expected_locks:
15008       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15009                                " current groups are '%s', used to be '%s'" %
15010                                (utils.CommaJoin(expected_locks),
15011                                 utils.CommaJoin(actual_locks)))
15012
15013     self.node_data = self.cfg.GetAllNodesInfo()
15014     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15015     instance_data = self.cfg.GetAllInstancesInfo()
15016
15017     if self.group is None:
15018       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15019                                (self.op.group_name, self.group_uuid))
15020
15021     (new_splits, previous_splits) = \
15022       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15023                                              for node in self.op.nodes],
15024                                             self.node_data, instance_data)
15025
15026     if new_splits:
15027       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15028
15029       if not self.op.force:
15030         raise errors.OpExecError("The following instances get split by this"
15031                                  " change and --force was not given: %s" %
15032                                  fmt_new_splits)
15033       else:
15034         self.LogWarning("This operation will split the following instances: %s",
15035                         fmt_new_splits)
15036
15037         if previous_splits:
15038           self.LogWarning("In addition, these already-split instances continue"
15039                           " to be split across groups: %s",
15040                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15041
15042   def Exec(self, feedback_fn):
15043     """Assign nodes to a new group.
15044
15045     """
15046     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15047
15048     self.cfg.AssignGroupNodes(mods)
15049
15050   @staticmethod
15051   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15052     """Check for split instances after a node assignment.
15053
15054     This method considers a series of node assignments as an atomic operation,
15055     and returns information about split instances after applying the set of
15056     changes.
15057
15058     In particular, it returns information about newly split instances, and
15059     instances that were already split, and remain so after the change.
15060
15061     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15062     considered.
15063
15064     @type changes: list of (node_name, new_group_uuid) pairs.
15065     @param changes: list of node assignments to consider.
15066     @param node_data: a dict with data for all nodes
15067     @param instance_data: a dict with all instances to consider
15068     @rtype: a two-tuple
15069     @return: a list of instances that were previously okay and result split as a
15070       consequence of this change, and a list of instances that were previously
15071       split and this change does not fix.
15072
15073     """
15074     changed_nodes = dict((node, group) for node, group in changes
15075                          if node_data[node].group != group)
15076
15077     all_split_instances = set()
15078     previously_split_instances = set()
15079
15080     def InstanceNodes(instance):
15081       return [instance.primary_node] + list(instance.secondary_nodes)
15082
15083     for inst in instance_data.values():
15084       if inst.disk_template not in constants.DTS_INT_MIRROR:
15085         continue
15086
15087       instance_nodes = InstanceNodes(inst)
15088
15089       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15090         previously_split_instances.add(inst.name)
15091
15092       if len(set(changed_nodes.get(node, node_data[node].group)
15093                  for node in instance_nodes)) > 1:
15094         all_split_instances.add(inst.name)
15095
15096     return (list(all_split_instances - previously_split_instances),
15097             list(previously_split_instances & all_split_instances))
15098
15099
15100 class _GroupQuery(_QueryBase):
15101   FIELDS = query.GROUP_FIELDS
15102
15103   def ExpandNames(self, lu):
15104     lu.needed_locks = {}
15105
15106     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15107     self._cluster = lu.cfg.GetClusterInfo()
15108     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15109
15110     if not self.names:
15111       self.wanted = [name_to_uuid[name]
15112                      for name in utils.NiceSort(name_to_uuid.keys())]
15113     else:
15114       # Accept names to be either names or UUIDs.
15115       missing = []
15116       self.wanted = []
15117       all_uuid = frozenset(self._all_groups.keys())
15118
15119       for name in self.names:
15120         if name in all_uuid:
15121           self.wanted.append(name)
15122         elif name in name_to_uuid:
15123           self.wanted.append(name_to_uuid[name])
15124         else:
15125           missing.append(name)
15126
15127       if missing:
15128         raise errors.OpPrereqError("Some groups do not exist: %s" %
15129                                    utils.CommaJoin(missing),
15130                                    errors.ECODE_NOENT)
15131
15132   def DeclareLocks(self, lu, level):
15133     pass
15134
15135   def _GetQueryData(self, lu):
15136     """Computes the list of node groups and their attributes.
15137
15138     """
15139     do_nodes = query.GQ_NODE in self.requested_data
15140     do_instances = query.GQ_INST in self.requested_data
15141
15142     group_to_nodes = None
15143     group_to_instances = None
15144
15145     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15146     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15147     # latter GetAllInstancesInfo() is not enough, for we have to go through
15148     # instance->node. Hence, we will need to process nodes even if we only need
15149     # instance information.
15150     if do_nodes or do_instances:
15151       all_nodes = lu.cfg.GetAllNodesInfo()
15152       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15153       node_to_group = {}
15154
15155       for node in all_nodes.values():
15156         if node.group in group_to_nodes:
15157           group_to_nodes[node.group].append(node.name)
15158           node_to_group[node.name] = node.group
15159
15160       if do_instances:
15161         all_instances = lu.cfg.GetAllInstancesInfo()
15162         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15163
15164         for instance in all_instances.values():
15165           node = instance.primary_node
15166           if node in node_to_group:
15167             group_to_instances[node_to_group[node]].append(instance.name)
15168
15169         if not do_nodes:
15170           # Do not pass on node information if it was not requested.
15171           group_to_nodes = None
15172
15173     return query.GroupQueryData(self._cluster,
15174                                 [self._all_groups[uuid]
15175                                  for uuid in self.wanted],
15176                                 group_to_nodes, group_to_instances,
15177                                 query.GQ_DISKPARAMS in self.requested_data)
15178
15179
15180 class LUGroupQuery(NoHooksLU):
15181   """Logical unit for querying node groups.
15182
15183   """
15184   REQ_BGL = False
15185
15186   def CheckArguments(self):
15187     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15188                           self.op.output_fields, False)
15189
15190   def ExpandNames(self):
15191     self.gq.ExpandNames(self)
15192
15193   def DeclareLocks(self, level):
15194     self.gq.DeclareLocks(self, level)
15195
15196   def Exec(self, feedback_fn):
15197     return self.gq.OldStyleQuery(self)
15198
15199
15200 class LUGroupSetParams(LogicalUnit):
15201   """Modifies the parameters of a node group.
15202
15203   """
15204   HPATH = "group-modify"
15205   HTYPE = constants.HTYPE_GROUP
15206   REQ_BGL = False
15207
15208   def CheckArguments(self):
15209     all_changes = [
15210       self.op.ndparams,
15211       self.op.diskparams,
15212       self.op.alloc_policy,
15213       self.op.hv_state,
15214       self.op.disk_state,
15215       self.op.ipolicy,
15216       ]
15217
15218     if all_changes.count(None) == len(all_changes):
15219       raise errors.OpPrereqError("Please pass at least one modification",
15220                                  errors.ECODE_INVAL)
15221
15222   def ExpandNames(self):
15223     # This raises errors.OpPrereqError on its own:
15224     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15225
15226     self.needed_locks = {
15227       locking.LEVEL_INSTANCE: [],
15228       locking.LEVEL_NODEGROUP: [self.group_uuid],
15229       }
15230
15231     self.share_locks[locking.LEVEL_INSTANCE] = 1
15232
15233   def DeclareLocks(self, level):
15234     if level == locking.LEVEL_INSTANCE:
15235       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15236
15237       # Lock instances optimistically, needs verification once group lock has
15238       # been acquired
15239       self.needed_locks[locking.LEVEL_INSTANCE] = \
15240           self.cfg.GetNodeGroupInstances(self.group_uuid)
15241
15242   @staticmethod
15243   def _UpdateAndVerifyDiskParams(old, new):
15244     """Updates and verifies disk parameters.
15245
15246     """
15247     new_params = _GetUpdatedParams(old, new)
15248     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15249     return new_params
15250
15251   def CheckPrereq(self):
15252     """Check prerequisites.
15253
15254     """
15255     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15256
15257     # Check if locked instances are still correct
15258     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15259
15260     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15261     cluster = self.cfg.GetClusterInfo()
15262
15263     if self.group is None:
15264       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15265                                (self.op.group_name, self.group_uuid))
15266
15267     if self.op.ndparams:
15268       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15269       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15270       self.new_ndparams = new_ndparams
15271
15272     if self.op.diskparams:
15273       diskparams = self.group.diskparams
15274       uavdp = self._UpdateAndVerifyDiskParams
15275       # For each disktemplate subdict update and verify the values
15276       new_diskparams = dict((dt,
15277                              uavdp(diskparams.get(dt, {}),
15278                                    self.op.diskparams[dt]))
15279                             for dt in constants.DISK_TEMPLATES
15280                             if dt in self.op.diskparams)
15281       # As we've all subdicts of diskparams ready, lets merge the actual
15282       # dict with all updated subdicts
15283       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15284       try:
15285         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15286       except errors.OpPrereqError, err:
15287         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15288                                    errors.ECODE_INVAL)
15289
15290     if self.op.hv_state:
15291       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15292                                                  self.group.hv_state_static)
15293
15294     if self.op.disk_state:
15295       self.new_disk_state = \
15296         _MergeAndVerifyDiskState(self.op.disk_state,
15297                                  self.group.disk_state_static)
15298
15299     if self.op.ipolicy:
15300       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15301                                             self.op.ipolicy,
15302                                             group_policy=True)
15303
15304       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15305       inst_filter = lambda inst: inst.name in owned_instances
15306       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15307       gmi = ganeti.masterd.instance
15308       violations = \
15309           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15310                                                                   self.group),
15311                                         new_ipolicy, instances)
15312
15313       if violations:
15314         self.LogWarning("After the ipolicy change the following instances"
15315                         " violate them: %s",
15316                         utils.CommaJoin(violations))
15317
15318   def BuildHooksEnv(self):
15319     """Build hooks env.
15320
15321     """
15322     return {
15323       "GROUP_NAME": self.op.group_name,
15324       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15325       }
15326
15327   def BuildHooksNodes(self):
15328     """Build hooks nodes.
15329
15330     """
15331     mn = self.cfg.GetMasterNode()
15332     return ([mn], [mn])
15333
15334   def Exec(self, feedback_fn):
15335     """Modifies the node group.
15336
15337     """
15338     result = []
15339
15340     if self.op.ndparams:
15341       self.group.ndparams = self.new_ndparams
15342       result.append(("ndparams", str(self.group.ndparams)))
15343
15344     if self.op.diskparams:
15345       self.group.diskparams = self.new_diskparams
15346       result.append(("diskparams", str(self.group.diskparams)))
15347
15348     if self.op.alloc_policy:
15349       self.group.alloc_policy = self.op.alloc_policy
15350
15351     if self.op.hv_state:
15352       self.group.hv_state_static = self.new_hv_state
15353
15354     if self.op.disk_state:
15355       self.group.disk_state_static = self.new_disk_state
15356
15357     if self.op.ipolicy:
15358       self.group.ipolicy = self.new_ipolicy
15359
15360     self.cfg.Update(self.group, feedback_fn)
15361     return result
15362
15363
15364 class LUGroupRemove(LogicalUnit):
15365   HPATH = "group-remove"
15366   HTYPE = constants.HTYPE_GROUP
15367   REQ_BGL = False
15368
15369   def ExpandNames(self):
15370     # This will raises errors.OpPrereqError on its own:
15371     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15372     self.needed_locks = {
15373       locking.LEVEL_NODEGROUP: [self.group_uuid],
15374       }
15375
15376   def CheckPrereq(self):
15377     """Check prerequisites.
15378
15379     This checks that the given group name exists as a node group, that is
15380     empty (i.e., contains no nodes), and that is not the last group of the
15381     cluster.
15382
15383     """
15384     # Verify that the group is empty.
15385     group_nodes = [node.name
15386                    for node in self.cfg.GetAllNodesInfo().values()
15387                    if node.group == self.group_uuid]
15388
15389     if group_nodes:
15390       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15391                                  " nodes: %s" %
15392                                  (self.op.group_name,
15393                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15394                                  errors.ECODE_STATE)
15395
15396     # Verify the cluster would not be left group-less.
15397     if len(self.cfg.GetNodeGroupList()) == 1:
15398       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15399                                  " removed" % self.op.group_name,
15400                                  errors.ECODE_STATE)
15401
15402   def BuildHooksEnv(self):
15403     """Build hooks env.
15404
15405     """
15406     return {
15407       "GROUP_NAME": self.op.group_name,
15408       }
15409
15410   def BuildHooksNodes(self):
15411     """Build hooks nodes.
15412
15413     """
15414     mn = self.cfg.GetMasterNode()
15415     return ([mn], [mn])
15416
15417   def Exec(self, feedback_fn):
15418     """Remove the node group.
15419
15420     """
15421     try:
15422       self.cfg.RemoveNodeGroup(self.group_uuid)
15423     except errors.ConfigurationError:
15424       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15425                                (self.op.group_name, self.group_uuid))
15426
15427     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15428
15429
15430 class LUGroupRename(LogicalUnit):
15431   HPATH = "group-rename"
15432   HTYPE = constants.HTYPE_GROUP
15433   REQ_BGL = False
15434
15435   def ExpandNames(self):
15436     # This raises errors.OpPrereqError on its own:
15437     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15438
15439     self.needed_locks = {
15440       locking.LEVEL_NODEGROUP: [self.group_uuid],
15441       }
15442
15443   def CheckPrereq(self):
15444     """Check prerequisites.
15445
15446     Ensures requested new name is not yet used.
15447
15448     """
15449     try:
15450       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15451     except errors.OpPrereqError:
15452       pass
15453     else:
15454       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15455                                  " node group (UUID: %s)" %
15456                                  (self.op.new_name, new_name_uuid),
15457                                  errors.ECODE_EXISTS)
15458
15459   def BuildHooksEnv(self):
15460     """Build hooks env.
15461
15462     """
15463     return {
15464       "OLD_NAME": self.op.group_name,
15465       "NEW_NAME": self.op.new_name,
15466       }
15467
15468   def BuildHooksNodes(self):
15469     """Build hooks nodes.
15470
15471     """
15472     mn = self.cfg.GetMasterNode()
15473
15474     all_nodes = self.cfg.GetAllNodesInfo()
15475     all_nodes.pop(mn, None)
15476
15477     run_nodes = [mn]
15478     run_nodes.extend(node.name for node in all_nodes.values()
15479                      if node.group == self.group_uuid)
15480
15481     return (run_nodes, run_nodes)
15482
15483   def Exec(self, feedback_fn):
15484     """Rename the node group.
15485
15486     """
15487     group = self.cfg.GetNodeGroup(self.group_uuid)
15488
15489     if group is None:
15490       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15491                                (self.op.group_name, self.group_uuid))
15492
15493     group.name = self.op.new_name
15494     self.cfg.Update(group, feedback_fn)
15495
15496     return self.op.new_name
15497
15498
15499 class LUGroupEvacuate(LogicalUnit):
15500   HPATH = "group-evacuate"
15501   HTYPE = constants.HTYPE_GROUP
15502   REQ_BGL = False
15503
15504   def ExpandNames(self):
15505     # This raises errors.OpPrereqError on its own:
15506     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15507
15508     if self.op.target_groups:
15509       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15510                                   self.op.target_groups)
15511     else:
15512       self.req_target_uuids = []
15513
15514     if self.group_uuid in self.req_target_uuids:
15515       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15516                                  " as a target group (targets are %s)" %
15517                                  (self.group_uuid,
15518                                   utils.CommaJoin(self.req_target_uuids)),
15519                                  errors.ECODE_INVAL)
15520
15521     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15522
15523     self.share_locks = _ShareAll()
15524     self.needed_locks = {
15525       locking.LEVEL_INSTANCE: [],
15526       locking.LEVEL_NODEGROUP: [],
15527       locking.LEVEL_NODE: [],
15528       }
15529
15530   def DeclareLocks(self, level):
15531     if level == locking.LEVEL_INSTANCE:
15532       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15533
15534       # Lock instances optimistically, needs verification once node and group
15535       # locks have been acquired
15536       self.needed_locks[locking.LEVEL_INSTANCE] = \
15537         self.cfg.GetNodeGroupInstances(self.group_uuid)
15538
15539     elif level == locking.LEVEL_NODEGROUP:
15540       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15541
15542       if self.req_target_uuids:
15543         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15544
15545         # Lock all groups used by instances optimistically; this requires going
15546         # via the node before it's locked, requiring verification later on
15547         lock_groups.update(group_uuid
15548                            for instance_name in
15549                              self.owned_locks(locking.LEVEL_INSTANCE)
15550                            for group_uuid in
15551                              self.cfg.GetInstanceNodeGroups(instance_name))
15552       else:
15553         # No target groups, need to lock all of them
15554         lock_groups = locking.ALL_SET
15555
15556       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15557
15558     elif level == locking.LEVEL_NODE:
15559       # This will only lock the nodes in the group to be evacuated which
15560       # contain actual instances
15561       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15562       self._LockInstancesNodes()
15563
15564       # Lock all nodes in group to be evacuated and target groups
15565       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15566       assert self.group_uuid in owned_groups
15567       member_nodes = [node_name
15568                       for group in owned_groups
15569                       for node_name in self.cfg.GetNodeGroup(group).members]
15570       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15571
15572   def CheckPrereq(self):
15573     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15574     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15575     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15576
15577     assert owned_groups.issuperset(self.req_target_uuids)
15578     assert self.group_uuid in owned_groups
15579
15580     # Check if locked instances are still correct
15581     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15582
15583     # Get instance information
15584     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15585
15586     # Check if node groups for locked instances are still correct
15587     _CheckInstancesNodeGroups(self.cfg, self.instances,
15588                               owned_groups, owned_nodes, self.group_uuid)
15589
15590     if self.req_target_uuids:
15591       # User requested specific target groups
15592       self.target_uuids = self.req_target_uuids
15593     else:
15594       # All groups except the one to be evacuated are potential targets
15595       self.target_uuids = [group_uuid for group_uuid in owned_groups
15596                            if group_uuid != self.group_uuid]
15597
15598       if not self.target_uuids:
15599         raise errors.OpPrereqError("There are no possible target groups",
15600                                    errors.ECODE_INVAL)
15601
15602   def BuildHooksEnv(self):
15603     """Build hooks env.
15604
15605     """
15606     return {
15607       "GROUP_NAME": self.op.group_name,
15608       "TARGET_GROUPS": " ".join(self.target_uuids),
15609       }
15610
15611   def BuildHooksNodes(self):
15612     """Build hooks nodes.
15613
15614     """
15615     mn = self.cfg.GetMasterNode()
15616
15617     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15618
15619     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15620
15621     return (run_nodes, run_nodes)
15622
15623   def Exec(self, feedback_fn):
15624     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15625
15626     assert self.group_uuid not in self.target_uuids
15627
15628     req = iallocator.IAReqGroupChange(instances=instances,
15629                                       target_groups=self.target_uuids)
15630     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15631
15632     ial.Run(self.op.iallocator)
15633
15634     if not ial.success:
15635       raise errors.OpPrereqError("Can't compute group evacuation using"
15636                                  " iallocator '%s': %s" %
15637                                  (self.op.iallocator, ial.info),
15638                                  errors.ECODE_NORES)
15639
15640     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15641
15642     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15643                  len(jobs), self.op.group_name)
15644
15645     return ResultWithJobs(jobs)
15646
15647
15648 class TagsLU(NoHooksLU): # pylint: disable=W0223
15649   """Generic tags LU.
15650
15651   This is an abstract class which is the parent of all the other tags LUs.
15652
15653   """
15654   def ExpandNames(self):
15655     self.group_uuid = None
15656     self.needed_locks = {}
15657
15658     if self.op.kind == constants.TAG_NODE:
15659       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15660       lock_level = locking.LEVEL_NODE
15661       lock_name = self.op.name
15662     elif self.op.kind == constants.TAG_INSTANCE:
15663       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15664       lock_level = locking.LEVEL_INSTANCE
15665       lock_name = self.op.name
15666     elif self.op.kind == constants.TAG_NODEGROUP:
15667       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15668       lock_level = locking.LEVEL_NODEGROUP
15669       lock_name = self.group_uuid
15670     elif self.op.kind == constants.TAG_NETWORK:
15671       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15672       lock_level = locking.LEVEL_NETWORK
15673       lock_name = self.network_uuid
15674     else:
15675       lock_level = None
15676       lock_name = None
15677
15678     if lock_level and getattr(self.op, "use_locking", True):
15679       self.needed_locks[lock_level] = lock_name
15680
15681     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15682     # not possible to acquire the BGL based on opcode parameters)
15683
15684   def CheckPrereq(self):
15685     """Check prerequisites.
15686
15687     """
15688     if self.op.kind == constants.TAG_CLUSTER:
15689       self.target = self.cfg.GetClusterInfo()
15690     elif self.op.kind == constants.TAG_NODE:
15691       self.target = self.cfg.GetNodeInfo(self.op.name)
15692     elif self.op.kind == constants.TAG_INSTANCE:
15693       self.target = self.cfg.GetInstanceInfo(self.op.name)
15694     elif self.op.kind == constants.TAG_NODEGROUP:
15695       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15696     elif self.op.kind == constants.TAG_NETWORK:
15697       self.target = self.cfg.GetNetwork(self.network_uuid)
15698     else:
15699       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15700                                  str(self.op.kind), errors.ECODE_INVAL)
15701
15702
15703 class LUTagsGet(TagsLU):
15704   """Returns the tags of a given object.
15705
15706   """
15707   REQ_BGL = False
15708
15709   def ExpandNames(self):
15710     TagsLU.ExpandNames(self)
15711
15712     # Share locks as this is only a read operation
15713     self.share_locks = _ShareAll()
15714
15715   def Exec(self, feedback_fn):
15716     """Returns the tag list.
15717
15718     """
15719     return list(self.target.GetTags())
15720
15721
15722 class LUTagsSearch(NoHooksLU):
15723   """Searches the tags for a given pattern.
15724
15725   """
15726   REQ_BGL = False
15727
15728   def ExpandNames(self):
15729     self.needed_locks = {}
15730
15731   def CheckPrereq(self):
15732     """Check prerequisites.
15733
15734     This checks the pattern passed for validity by compiling it.
15735
15736     """
15737     try:
15738       self.re = re.compile(self.op.pattern)
15739     except re.error, err:
15740       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15741                                  (self.op.pattern, err), errors.ECODE_INVAL)
15742
15743   def Exec(self, feedback_fn):
15744     """Returns the tag list.
15745
15746     """
15747     cfg = self.cfg
15748     tgts = [("/cluster", cfg.GetClusterInfo())]
15749     ilist = cfg.GetAllInstancesInfo().values()
15750     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15751     nlist = cfg.GetAllNodesInfo().values()
15752     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15753     tgts.extend(("/nodegroup/%s" % n.name, n)
15754                 for n in cfg.GetAllNodeGroupsInfo().values())
15755     results = []
15756     for path, target in tgts:
15757       for tag in target.GetTags():
15758         if self.re.search(tag):
15759           results.append((path, tag))
15760     return results
15761
15762
15763 class LUTagsSet(TagsLU):
15764   """Sets a tag on a given object.
15765
15766   """
15767   REQ_BGL = False
15768
15769   def CheckPrereq(self):
15770     """Check prerequisites.
15771
15772     This checks the type and length of the tag name and value.
15773
15774     """
15775     TagsLU.CheckPrereq(self)
15776     for tag in self.op.tags:
15777       objects.TaggableObject.ValidateTag(tag)
15778
15779   def Exec(self, feedback_fn):
15780     """Sets the tag.
15781
15782     """
15783     try:
15784       for tag in self.op.tags:
15785         self.target.AddTag(tag)
15786     except errors.TagError, err:
15787       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15788     self.cfg.Update(self.target, feedback_fn)
15789
15790
15791 class LUTagsDel(TagsLU):
15792   """Delete a list of tags from a given object.
15793
15794   """
15795   REQ_BGL = False
15796
15797   def CheckPrereq(self):
15798     """Check prerequisites.
15799
15800     This checks that we have the given tag.
15801
15802     """
15803     TagsLU.CheckPrereq(self)
15804     for tag in self.op.tags:
15805       objects.TaggableObject.ValidateTag(tag)
15806     del_tags = frozenset(self.op.tags)
15807     cur_tags = self.target.GetTags()
15808
15809     diff_tags = del_tags - cur_tags
15810     if diff_tags:
15811       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15812       raise errors.OpPrereqError("Tag(s) %s not found" %
15813                                  (utils.CommaJoin(diff_names), ),
15814                                  errors.ECODE_NOENT)
15815
15816   def Exec(self, feedback_fn):
15817     """Remove the tag from the object.
15818
15819     """
15820     for tag in self.op.tags:
15821       self.target.RemoveTag(tag)
15822     self.cfg.Update(self.target, feedback_fn)
15823
15824
15825 class LUTestDelay(NoHooksLU):
15826   """Sleep for a specified amount of time.
15827
15828   This LU sleeps on the master and/or nodes for a specified amount of
15829   time.
15830
15831   """
15832   REQ_BGL = False
15833
15834   def ExpandNames(self):
15835     """Expand names and set required locks.
15836
15837     This expands the node list, if any.
15838
15839     """
15840     self.needed_locks = {}
15841     if self.op.on_nodes:
15842       # _GetWantedNodes can be used here, but is not always appropriate to use
15843       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15844       # more information.
15845       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15846       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15847
15848   def _TestDelay(self):
15849     """Do the actual sleep.
15850
15851     """
15852     if self.op.on_master:
15853       if not utils.TestDelay(self.op.duration):
15854         raise errors.OpExecError("Error during master delay test")
15855     if self.op.on_nodes:
15856       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15857       for node, node_result in result.items():
15858         node_result.Raise("Failure during rpc call to node %s" % node)
15859
15860   def Exec(self, feedback_fn):
15861     """Execute the test delay opcode, with the wanted repetitions.
15862
15863     """
15864     if self.op.repeat == 0:
15865       self._TestDelay()
15866     else:
15867       top_value = self.op.repeat - 1
15868       for i in range(self.op.repeat):
15869         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15870         self._TestDelay()
15871
15872
15873 class LURestrictedCommand(NoHooksLU):
15874   """Logical unit for executing restricted commands.
15875
15876   """
15877   REQ_BGL = False
15878
15879   def ExpandNames(self):
15880     if self.op.nodes:
15881       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15882
15883     self.needed_locks = {
15884       locking.LEVEL_NODE: self.op.nodes,
15885       }
15886     self.share_locks = {
15887       locking.LEVEL_NODE: not self.op.use_locking,
15888       }
15889
15890   def CheckPrereq(self):
15891     """Check prerequisites.
15892
15893     """
15894
15895   def Exec(self, feedback_fn):
15896     """Execute restricted command and return output.
15897
15898     """
15899     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15900
15901     # Check if correct locks are held
15902     assert set(self.op.nodes).issubset(owned_nodes)
15903
15904     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15905
15906     result = []
15907
15908     for node_name in self.op.nodes:
15909       nres = rpcres[node_name]
15910       if nres.fail_msg:
15911         msg = ("Command '%s' on node '%s' failed: %s" %
15912                (self.op.command, node_name, nres.fail_msg))
15913         result.append((False, msg))
15914       else:
15915         result.append((True, nres.payload))
15916
15917     return result
15918
15919
15920 class LUTestJqueue(NoHooksLU):
15921   """Utility LU to test some aspects of the job queue.
15922
15923   """
15924   REQ_BGL = False
15925
15926   # Must be lower than default timeout for WaitForJobChange to see whether it
15927   # notices changed jobs
15928   _CLIENT_CONNECT_TIMEOUT = 20.0
15929   _CLIENT_CONFIRM_TIMEOUT = 60.0
15930
15931   @classmethod
15932   def _NotifyUsingSocket(cls, cb, errcls):
15933     """Opens a Unix socket and waits for another program to connect.
15934
15935     @type cb: callable
15936     @param cb: Callback to send socket name to client
15937     @type errcls: class
15938     @param errcls: Exception class to use for errors
15939
15940     """
15941     # Using a temporary directory as there's no easy way to create temporary
15942     # sockets without writing a custom loop around tempfile.mktemp and
15943     # socket.bind
15944     tmpdir = tempfile.mkdtemp()
15945     try:
15946       tmpsock = utils.PathJoin(tmpdir, "sock")
15947
15948       logging.debug("Creating temporary socket at %s", tmpsock)
15949       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15950       try:
15951         sock.bind(tmpsock)
15952         sock.listen(1)
15953
15954         # Send details to client
15955         cb(tmpsock)
15956
15957         # Wait for client to connect before continuing
15958         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15959         try:
15960           (conn, _) = sock.accept()
15961         except socket.error, err:
15962           raise errcls("Client didn't connect in time (%s)" % err)
15963       finally:
15964         sock.close()
15965     finally:
15966       # Remove as soon as client is connected
15967       shutil.rmtree(tmpdir)
15968
15969     # Wait for client to close
15970     try:
15971       try:
15972         # pylint: disable=E1101
15973         # Instance of '_socketobject' has no ... member
15974         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15975         conn.recv(1)
15976       except socket.error, err:
15977         raise errcls("Client failed to confirm notification (%s)" % err)
15978     finally:
15979       conn.close()
15980
15981   def _SendNotification(self, test, arg, sockname):
15982     """Sends a notification to the client.
15983
15984     @type test: string
15985     @param test: Test name
15986     @param arg: Test argument (depends on test)
15987     @type sockname: string
15988     @param sockname: Socket path
15989
15990     """
15991     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15992
15993   def _Notify(self, prereq, test, arg):
15994     """Notifies the client of a test.
15995
15996     @type prereq: bool
15997     @param prereq: Whether this is a prereq-phase test
15998     @type test: string
15999     @param test: Test name
16000     @param arg: Test argument (depends on test)
16001
16002     """
16003     if prereq:
16004       errcls = errors.OpPrereqError
16005     else:
16006       errcls = errors.OpExecError
16007
16008     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16009                                                   test, arg),
16010                                    errcls)
16011
16012   def CheckArguments(self):
16013     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16014     self.expandnames_calls = 0
16015
16016   def ExpandNames(self):
16017     checkargs_calls = getattr(self, "checkargs_calls", 0)
16018     if checkargs_calls < 1:
16019       raise errors.ProgrammerError("CheckArguments was not called")
16020
16021     self.expandnames_calls += 1
16022
16023     if self.op.notify_waitlock:
16024       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16025
16026     self.LogInfo("Expanding names")
16027
16028     # Get lock on master node (just to get a lock, not for a particular reason)
16029     self.needed_locks = {
16030       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16031       }
16032
16033   def Exec(self, feedback_fn):
16034     if self.expandnames_calls < 1:
16035       raise errors.ProgrammerError("ExpandNames was not called")
16036
16037     if self.op.notify_exec:
16038       self._Notify(False, constants.JQT_EXEC, None)
16039
16040     self.LogInfo("Executing")
16041
16042     if self.op.log_messages:
16043       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16044       for idx, msg in enumerate(self.op.log_messages):
16045         self.LogInfo("Sending log message %s", idx + 1)
16046         feedback_fn(constants.JQT_MSGPREFIX + msg)
16047         # Report how many test messages have been sent
16048         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16049
16050     if self.op.fail:
16051       raise errors.OpExecError("Opcode failure was requested")
16052
16053     return True
16054
16055
16056 class LUTestAllocator(NoHooksLU):
16057   """Run allocator tests.
16058
16059   This LU runs the allocator tests
16060
16061   """
16062   def CheckPrereq(self):
16063     """Check prerequisites.
16064
16065     This checks the opcode parameters depending on the director and mode test.
16066
16067     """
16068     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16069                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16070       for attr in ["memory", "disks", "disk_template",
16071                    "os", "tags", "nics", "vcpus"]:
16072         if not hasattr(self.op, attr):
16073           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16074                                      attr, errors.ECODE_INVAL)
16075       iname = self.cfg.ExpandInstanceName(self.op.name)
16076       if iname is not None:
16077         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16078                                    iname, errors.ECODE_EXISTS)
16079       if not isinstance(self.op.nics, list):
16080         raise errors.OpPrereqError("Invalid parameter 'nics'",
16081                                    errors.ECODE_INVAL)
16082       if not isinstance(self.op.disks, list):
16083         raise errors.OpPrereqError("Invalid parameter 'disks'",
16084                                    errors.ECODE_INVAL)
16085       for row in self.op.disks:
16086         if (not isinstance(row, dict) or
16087             constants.IDISK_SIZE not in row or
16088             not isinstance(row[constants.IDISK_SIZE], int) or
16089             constants.IDISK_MODE not in row or
16090             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16091           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16092                                      " parameter", errors.ECODE_INVAL)
16093       if self.op.hypervisor is None:
16094         self.op.hypervisor = self.cfg.GetHypervisorType()
16095     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16096       fname = _ExpandInstanceName(self.cfg, self.op.name)
16097       self.op.name = fname
16098       self.relocate_from = \
16099           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16100     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16101                           constants.IALLOCATOR_MODE_NODE_EVAC):
16102       if not self.op.instances:
16103         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16104       self.op.instances = _GetWantedInstances(self, self.op.instances)
16105     else:
16106       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16107                                  self.op.mode, errors.ECODE_INVAL)
16108
16109     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16110       if self.op.iallocator is None:
16111         raise errors.OpPrereqError("Missing allocator name",
16112                                    errors.ECODE_INVAL)
16113     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16114       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16115                                  self.op.direction, errors.ECODE_INVAL)
16116
16117   def Exec(self, feedback_fn):
16118     """Run the allocator test.
16119
16120     """
16121     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16122       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16123                                           memory=self.op.memory,
16124                                           disks=self.op.disks,
16125                                           disk_template=self.op.disk_template,
16126                                           os=self.op.os,
16127                                           tags=self.op.tags,
16128                                           nics=self.op.nics,
16129                                           vcpus=self.op.vcpus,
16130                                           spindle_use=self.op.spindle_use,
16131                                           hypervisor=self.op.hypervisor)
16132     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16133       req = iallocator.IAReqRelocate(name=self.op.name,
16134                                      relocate_from=list(self.relocate_from))
16135     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16136       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16137                                         target_groups=self.op.target_groups)
16138     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16139       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16140                                      evac_mode=self.op.evac_mode)
16141     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16142       disk_template = self.op.disk_template
16143       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16144                                              memory=self.op.memory,
16145                                              disks=self.op.disks,
16146                                              disk_template=disk_template,
16147                                              os=self.op.os,
16148                                              tags=self.op.tags,
16149                                              nics=self.op.nics,
16150                                              vcpus=self.op.vcpus,
16151                                              spindle_use=self.op.spindle_use,
16152                                              hypervisor=self.op.hypervisor)
16153                for idx in range(self.op.count)]
16154       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16155     else:
16156       raise errors.ProgrammerError("Uncatched mode %s in"
16157                                    " LUTestAllocator.Exec", self.op.mode)
16158
16159     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16160     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16161       result = ial.in_text
16162     else:
16163       ial.Run(self.op.iallocator, validate=False)
16164       result = ial.out_text
16165     return result
16166
16167
16168 class LUNetworkAdd(LogicalUnit):
16169   """Logical unit for creating networks.
16170
16171   """
16172   HPATH = "network-add"
16173   HTYPE = constants.HTYPE_NETWORK
16174   REQ_BGL = False
16175
16176   def BuildHooksNodes(self):
16177     """Build hooks nodes.
16178
16179     """
16180     mn = self.cfg.GetMasterNode()
16181     return ([mn], [mn])
16182
16183   def CheckArguments(self):
16184     if self.op.mac_prefix:
16185       self.op.mac_prefix = \
16186         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16187
16188   def ExpandNames(self):
16189     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16190
16191     if self.op.conflicts_check:
16192       self.share_locks[locking.LEVEL_NODE] = 1
16193       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16194       self.needed_locks = {
16195         locking.LEVEL_NODE: locking.ALL_SET,
16196         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16197         }
16198     else:
16199       self.needed_locks = {}
16200
16201     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16202
16203   def CheckPrereq(self):
16204     if self.op.network is None:
16205       raise errors.OpPrereqError("Network must be given",
16206                                  errors.ECODE_INVAL)
16207
16208     uuid = self.cfg.LookupNetwork(self.op.network_name)
16209
16210     if uuid:
16211       raise errors.OpPrereqError(("Network with name '%s' already exists" %
16212                                   self.op.network_name), errors.ECODE_EXISTS)
16213
16214     # Check tag validity
16215     for tag in self.op.tags:
16216       objects.TaggableObject.ValidateTag(tag)
16217
16218   def BuildHooksEnv(self):
16219     """Build hooks env.
16220
16221     """
16222     args = {
16223       "name": self.op.network_name,
16224       "subnet": self.op.network,
16225       "gateway": self.op.gateway,
16226       "network6": self.op.network6,
16227       "gateway6": self.op.gateway6,
16228       "mac_prefix": self.op.mac_prefix,
16229       "network_type": self.op.network_type,
16230       "tags": self.op.tags,
16231       }
16232     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16233
16234   def Exec(self, feedback_fn):
16235     """Add the ip pool to the cluster.
16236
16237     """
16238     nobj = objects.Network(name=self.op.network_name,
16239                            network=self.op.network,
16240                            gateway=self.op.gateway,
16241                            network6=self.op.network6,
16242                            gateway6=self.op.gateway6,
16243                            mac_prefix=self.op.mac_prefix,
16244                            network_type=self.op.network_type,
16245                            uuid=self.network_uuid,
16246                            family=constants.IP4_VERSION)
16247     # Initialize the associated address pool
16248     try:
16249       pool = network.AddressPool.InitializeNetwork(nobj)
16250     except errors.AddressPoolError, e:
16251       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16252
16253     # Check if we need to reserve the nodes and the cluster master IP
16254     # These may not be allocated to any instances in routed mode, as
16255     # they wouldn't function anyway.
16256     if self.op.conflicts_check:
16257       for node in self.cfg.GetAllNodesInfo().values():
16258         for ip in [node.primary_ip, node.secondary_ip]:
16259           try:
16260             if pool.Contains(ip):
16261               pool.Reserve(ip)
16262               self.LogInfo("Reserved IP address of node '%s' (%s)",
16263                            node.name, ip)
16264           except errors.AddressPoolError:
16265             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16266                             node.name, ip)
16267
16268       master_ip = self.cfg.GetClusterInfo().master_ip
16269       try:
16270         if pool.Contains(master_ip):
16271           pool.Reserve(master_ip)
16272           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16273       except errors.AddressPoolError:
16274         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16275                         master_ip)
16276
16277     if self.op.add_reserved_ips:
16278       for ip in self.op.add_reserved_ips:
16279         try:
16280           pool.Reserve(ip, external=True)
16281         except errors.AddressPoolError, e:
16282           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16283
16284     if self.op.tags:
16285       for tag in self.op.tags:
16286         nobj.AddTag(tag)
16287
16288     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16289     del self.remove_locks[locking.LEVEL_NETWORK]
16290
16291
16292 class LUNetworkRemove(LogicalUnit):
16293   HPATH = "network-remove"
16294   HTYPE = constants.HTYPE_NETWORK
16295   REQ_BGL = False
16296
16297   def ExpandNames(self):
16298     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16299
16300     if not self.network_uuid:
16301       raise errors.OpPrereqError(("Network '%s' not found" %
16302                                   self.op.network_name), errors.ECODE_NOENT)
16303
16304     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16305     self.needed_locks = {
16306       locking.LEVEL_NETWORK: [self.network_uuid],
16307       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16308       }
16309
16310   def CheckPrereq(self):
16311     """Check prerequisites.
16312
16313     This checks that the given network name exists as a network, that is
16314     empty (i.e., contains no nodes), and that is not the last group of the
16315     cluster.
16316
16317     """
16318     # Verify that the network is not conncted.
16319     node_groups = [group.name
16320                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16321                    if self.network_uuid in group.networks]
16322
16323     if node_groups:
16324       self.LogWarning("Network '%s' is connected to the following"
16325                       " node groups: %s" %
16326                       (self.op.network_name,
16327                        utils.CommaJoin(utils.NiceSort(node_groups))))
16328       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16329
16330   def BuildHooksEnv(self):
16331     """Build hooks env.
16332
16333     """
16334     return {
16335       "NETWORK_NAME": self.op.network_name,
16336       }
16337
16338   def BuildHooksNodes(self):
16339     """Build hooks nodes.
16340
16341     """
16342     mn = self.cfg.GetMasterNode()
16343     return ([mn], [mn])
16344
16345   def Exec(self, feedback_fn):
16346     """Remove the network.
16347
16348     """
16349     try:
16350       self.cfg.RemoveNetwork(self.network_uuid)
16351     except errors.ConfigurationError:
16352       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16353                                (self.op.network_name, self.network_uuid))
16354
16355
16356 class LUNetworkSetParams(LogicalUnit):
16357   """Modifies the parameters of a network.
16358
16359   """
16360   HPATH = "network-modify"
16361   HTYPE = constants.HTYPE_NETWORK
16362   REQ_BGL = False
16363
16364   def CheckArguments(self):
16365     if (self.op.gateway and
16366         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16367       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16368                                  " at once", errors.ECODE_INVAL)
16369
16370   def ExpandNames(self):
16371     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16372     if self.network_uuid is None:
16373       raise errors.OpPrereqError(("Network '%s' not found" %
16374                                   self.op.network_name), errors.ECODE_NOENT)
16375
16376     self.needed_locks = {
16377       locking.LEVEL_NETWORK: [self.network_uuid],
16378       }
16379
16380   def CheckPrereq(self):
16381     """Check prerequisites.
16382
16383     """
16384     self.network = self.cfg.GetNetwork(self.network_uuid)
16385     self.gateway = self.network.gateway
16386     self.network_type = self.network.network_type
16387     self.mac_prefix = self.network.mac_prefix
16388     self.network6 = self.network.network6
16389     self.gateway6 = self.network.gateway6
16390     self.tags = self.network.tags
16391
16392     self.pool = network.AddressPool(self.network)
16393
16394     if self.op.gateway:
16395       if self.op.gateway == constants.VALUE_NONE:
16396         self.gateway = None
16397       else:
16398         self.gateway = self.op.gateway
16399         if self.pool.IsReserved(self.gateway):
16400           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16401                                      " reserved" % self.gateway,
16402                                      errors.ECODE_STATE)
16403
16404     if self.op.network_type:
16405       if self.op.network_type == constants.VALUE_NONE:
16406         self.network_type = None
16407       else:
16408         self.network_type = self.op.network_type
16409
16410     if self.op.mac_prefix:
16411       if self.op.mac_prefix == constants.VALUE_NONE:
16412         self.mac_prefix = None
16413       else:
16414         self.mac_prefix = \
16415           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16416
16417     if self.op.gateway6:
16418       if self.op.gateway6 == constants.VALUE_NONE:
16419         self.gateway6 = None
16420       else:
16421         self.gateway6 = self.op.gateway6
16422
16423     if self.op.network6:
16424       if self.op.network6 == constants.VALUE_NONE:
16425         self.network6 = None
16426       else:
16427         self.network6 = self.op.network6
16428
16429   def BuildHooksEnv(self):
16430     """Build hooks env.
16431
16432     """
16433     args = {
16434       "name": self.op.network_name,
16435       "subnet": self.network.network,
16436       "gateway": self.gateway,
16437       "network6": self.network6,
16438       "gateway6": self.gateway6,
16439       "mac_prefix": self.mac_prefix,
16440       "network_type": self.network_type,
16441       "tags": self.tags,
16442       }
16443     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16444
16445   def BuildHooksNodes(self):
16446     """Build hooks nodes.
16447
16448     """
16449     mn = self.cfg.GetMasterNode()
16450     return ([mn], [mn])
16451
16452   def Exec(self, feedback_fn):
16453     """Modifies the network.
16454
16455     """
16456     #TODO: reserve/release via temporary reservation manager
16457     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16458     if self.op.gateway:
16459       if self.gateway == self.network.gateway:
16460         self.LogWarning("Gateway is already %s", self.gateway)
16461       else:
16462         if self.gateway:
16463           self.pool.Reserve(self.gateway, external=True)
16464         if self.network.gateway:
16465           self.pool.Release(self.network.gateway, external=True)
16466         self.network.gateway = self.gateway
16467
16468     if self.op.add_reserved_ips:
16469       for ip in self.op.add_reserved_ips:
16470         try:
16471           if self.pool.IsReserved(ip):
16472             self.LogWarning("IP address %s is already reserved", ip)
16473           else:
16474             self.pool.Reserve(ip, external=True)
16475         except errors.AddressPoolError, err:
16476           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16477
16478     if self.op.remove_reserved_ips:
16479       for ip in self.op.remove_reserved_ips:
16480         if ip == self.network.gateway:
16481           self.LogWarning("Cannot unreserve Gateway's IP")
16482           continue
16483         try:
16484           if not self.pool.IsReserved(ip):
16485             self.LogWarning("IP address %s is already unreserved", ip)
16486           else:
16487             self.pool.Release(ip, external=True)
16488         except errors.AddressPoolError, err:
16489           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16490
16491     if self.op.mac_prefix:
16492       self.network.mac_prefix = self.mac_prefix
16493
16494     if self.op.network6:
16495       self.network.network6 = self.network6
16496
16497     if self.op.gateway6:
16498       self.network.gateway6 = self.gateway6
16499
16500     if self.op.network_type:
16501       self.network.network_type = self.network_type
16502
16503     self.pool.Validate()
16504
16505     self.cfg.Update(self.network, feedback_fn)
16506
16507
16508 class _NetworkQuery(_QueryBase):
16509   FIELDS = query.NETWORK_FIELDS
16510
16511   def ExpandNames(self, lu):
16512     lu.needed_locks = {}
16513     lu.share_locks = _ShareAll()
16514
16515     self.do_locking = self.use_locking
16516
16517     all_networks = lu.cfg.GetAllNetworksInfo()
16518     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16519
16520     if self.names:
16521       missing = []
16522       self.wanted = []
16523
16524       for name in self.names:
16525         if name in name_to_uuid:
16526           self.wanted.append(name_to_uuid[name])
16527         else:
16528           missing.append(name)
16529
16530       if missing:
16531         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16532                                    errors.ECODE_NOENT)
16533     else:
16534       self.wanted = locking.ALL_SET
16535
16536     if self.do_locking:
16537       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16538       if query.NETQ_INST in self.requested_data:
16539         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16540       if query.NETQ_GROUP in self.requested_data:
16541         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16542
16543   def DeclareLocks(self, lu, level):
16544     pass
16545
16546   def _GetQueryData(self, lu):
16547     """Computes the list of networks and their attributes.
16548
16549     """
16550     all_networks = lu.cfg.GetAllNetworksInfo()
16551
16552     network_uuids = self._GetNames(lu, all_networks.keys(),
16553                                    locking.LEVEL_NETWORK)
16554
16555     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16556
16557     do_instances = query.NETQ_INST in self.requested_data
16558     do_groups = query.NETQ_GROUP in self.requested_data
16559
16560     network_to_instances = None
16561     network_to_groups = None
16562
16563     # For NETQ_GROUP, we need to map network->[groups]
16564     if do_groups:
16565       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16566       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16567       for _, group in all_groups.iteritems():
16568         for net_uuid in network_uuids:
16569           netparams = group.networks.get(net_uuid, None)
16570           if netparams:
16571             info = (group.name, netparams[constants.NIC_MODE],
16572                     netparams[constants.NIC_LINK])
16573
16574             network_to_groups[net_uuid].append(info)
16575
16576     if do_instances:
16577       all_instances = lu.cfg.GetAllInstancesInfo()
16578       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16579       for instance in all_instances.values():
16580         for nic in instance.nics:
16581           if nic.network:
16582             net_uuid = name_to_uuid[nic.network]
16583             if net_uuid in network_uuids:
16584               network_to_instances[net_uuid].append(instance.name)
16585             break
16586
16587     if query.NETQ_STATS in self.requested_data:
16588       stats = \
16589         dict((uuid,
16590               self._GetStats(network.AddressPool(all_networks[uuid])))
16591              for uuid in network_uuids)
16592     else:
16593       stats = None
16594
16595     return query.NetworkQueryData([all_networks[uuid]
16596                                    for uuid in network_uuids],
16597                                    network_to_groups,
16598                                    network_to_instances,
16599                                    stats)
16600
16601   @staticmethod
16602   def _GetStats(pool):
16603     """Returns statistics for a network address pool.
16604
16605     """
16606     return {
16607       "free_count": pool.GetFreeCount(),
16608       "reserved_count": pool.GetReservedCount(),
16609       "map": pool.GetMap(),
16610       "external_reservations":
16611         utils.CommaJoin(pool.GetExternalReservations()),
16612       }
16613
16614
16615 class LUNetworkQuery(NoHooksLU):
16616   """Logical unit for querying networks.
16617
16618   """
16619   REQ_BGL = False
16620
16621   def CheckArguments(self):
16622     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16623                             self.op.output_fields, self.op.use_locking)
16624
16625   def ExpandNames(self):
16626     self.nq.ExpandNames(self)
16627
16628   def Exec(self, feedback_fn):
16629     return self.nq.OldStyleQuery(self)
16630
16631
16632 class LUNetworkConnect(LogicalUnit):
16633   """Connect a network to a nodegroup
16634
16635   """
16636   HPATH = "network-connect"
16637   HTYPE = constants.HTYPE_NETWORK
16638   REQ_BGL = False
16639
16640   def ExpandNames(self):
16641     self.network_name = self.op.network_name
16642     self.group_name = self.op.group_name
16643     self.network_mode = self.op.network_mode
16644     self.network_link = self.op.network_link
16645
16646     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16647     if self.network_uuid is None:
16648       raise errors.OpPrereqError("Network '%s' does not exist" %
16649                                  self.network_name, errors.ECODE_NOENT)
16650
16651     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16652     if self.group_uuid is None:
16653       raise errors.OpPrereqError("Group '%s' does not exist" %
16654                                  self.group_name, errors.ECODE_NOENT)
16655
16656     self.needed_locks = {
16657       locking.LEVEL_INSTANCE: [],
16658       locking.LEVEL_NODEGROUP: [self.group_uuid],
16659       }
16660     self.share_locks[locking.LEVEL_INSTANCE] = 1
16661
16662     if self.op.conflicts_check:
16663       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16664       self.share_locks[locking.LEVEL_NETWORK] = 1
16665
16666   def DeclareLocks(self, level):
16667     if level == locking.LEVEL_INSTANCE:
16668       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16669
16670       # Lock instances optimistically, needs verification once group lock has
16671       # been acquired
16672       if self.op.conflicts_check:
16673         self.needed_locks[locking.LEVEL_INSTANCE] = \
16674             self.cfg.GetNodeGroupInstances(self.group_uuid)
16675
16676   def BuildHooksEnv(self):
16677     ret = {
16678       "GROUP_NAME": self.group_name,
16679       "GROUP_NETWORK_MODE": self.network_mode,
16680       "GROUP_NETWORK_LINK": self.network_link,
16681       }
16682     return ret
16683
16684   def BuildHooksNodes(self):
16685     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16686     return (nodes, nodes)
16687
16688   def CheckPrereq(self):
16689     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16690
16691     assert self.group_uuid in owned_groups
16692
16693     self.netparams = {
16694       constants.NIC_MODE: self.network_mode,
16695       constants.NIC_LINK: self.network_link,
16696       }
16697     objects.NIC.CheckParameterSyntax(self.netparams)
16698
16699     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16700     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16701     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16702     self.connected = False
16703     if self.network_uuid in self.group.networks:
16704       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16705                       (self.network_name, self.group.name))
16706       self.connected = True
16707       return
16708
16709     if self.op.conflicts_check:
16710       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16711
16712       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16713                             "connect to")
16714
16715   def Exec(self, feedback_fn):
16716     if self.connected:
16717       return
16718
16719     self.group.networks[self.network_uuid] = self.netparams
16720     self.cfg.Update(self.group, feedback_fn)
16721
16722
16723 def _NetworkConflictCheck(lu, check_fn, action):
16724   """Checks for network interface conflicts with a network.
16725
16726   @type lu: L{LogicalUnit}
16727   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16728     returning boolean
16729   @param check_fn: Function checking for conflict
16730   @type action: string
16731   @param action: Part of error message (see code)
16732   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16733
16734   """
16735   # Check if locked instances are still correct
16736   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16737   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16738
16739   conflicts = []
16740
16741   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16742     instconflicts = [(idx, nic.ip)
16743                      for (idx, nic) in enumerate(instance.nics)
16744                      if check_fn(nic)]
16745
16746     if instconflicts:
16747       conflicts.append((instance.name, instconflicts))
16748
16749   if conflicts:
16750     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16751                   " node group '%s', are in use: %s" %
16752                   (lu.network_name, action, lu.group.name,
16753                    utils.CommaJoin(("%s: %s" %
16754                                     (name, _FmtNetworkConflict(details)))
16755                                    for (name, details) in conflicts)))
16756
16757     raise errors.OpPrereqError("Conflicting IP addresses found; "
16758                                " remove/modify the corresponding network"
16759                                " interfaces", errors.ECODE_STATE)
16760
16761
16762 def _FmtNetworkConflict(details):
16763   """Utility for L{_NetworkConflictCheck}.
16764
16765   """
16766   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16767                          for (idx, ipaddr) in details)
16768
16769
16770 class LUNetworkDisconnect(LogicalUnit):
16771   """Disconnect a network to a nodegroup
16772
16773   """
16774   HPATH = "network-disconnect"
16775   HTYPE = constants.HTYPE_NETWORK
16776   REQ_BGL = False
16777
16778   def ExpandNames(self):
16779     self.network_name = self.op.network_name
16780     self.group_name = self.op.group_name
16781
16782     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16783     if self.network_uuid is None:
16784       raise errors.OpPrereqError("Network '%s' does not exist" %
16785                                  self.network_name, errors.ECODE_NOENT)
16786
16787     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16788     if self.group_uuid is None:
16789       raise errors.OpPrereqError("Group '%s' does not exist" %
16790                                  self.group_name, errors.ECODE_NOENT)
16791
16792     self.needed_locks = {
16793       locking.LEVEL_INSTANCE: [],
16794       locking.LEVEL_NODEGROUP: [self.group_uuid],
16795       }
16796     self.share_locks[locking.LEVEL_INSTANCE] = 1
16797
16798   def DeclareLocks(self, level):
16799     if level == locking.LEVEL_INSTANCE:
16800       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16801
16802       # Lock instances optimistically, needs verification once group lock has
16803       # been acquired
16804       if self.op.conflicts_check:
16805         self.needed_locks[locking.LEVEL_INSTANCE] = \
16806           self.cfg.GetNodeGroupInstances(self.group_uuid)
16807
16808   def BuildHooksEnv(self):
16809     ret = {
16810       "GROUP_NAME": self.group_name,
16811       }
16812     return ret
16813
16814   def BuildHooksNodes(self):
16815     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16816     return (nodes, nodes)
16817
16818   def CheckPrereq(self):
16819     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16820
16821     assert self.group_uuid in owned_groups
16822
16823     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16824     self.connected = True
16825     if self.network_uuid not in self.group.networks:
16826       self.LogWarning("Network '%s' is not mapped to group '%s'",
16827                       self.network_name, self.group.name)
16828       self.connected = False
16829       return
16830
16831     if self.op.conflicts_check:
16832       _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16833                             "disconnect from")
16834
16835   def Exec(self, feedback_fn):
16836     if not self.connected:
16837       return
16838
16839     del self.group.networks[self.network_uuid]
16840     self.cfg.Update(self.group, feedback_fn)
16841
16842
16843 #: Query type implementations
16844 _QUERY_IMPL = {
16845   constants.QR_CLUSTER: _ClusterQuery,
16846   constants.QR_INSTANCE: _InstanceQuery,
16847   constants.QR_NODE: _NodeQuery,
16848   constants.QR_GROUP: _GroupQuery,
16849   constants.QR_NETWORK: _NetworkQuery,
16850   constants.QR_OS: _OsQuery,
16851   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16852   constants.QR_EXPORT: _ExportQuery,
16853   }
16854
16855 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16856
16857
16858 def _GetQueryImplementation(name):
16859   """Returns the implemtnation for a query type.
16860
16861   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16862
16863   """
16864   try:
16865     return _QUERY_IMPL[name]
16866   except KeyError:
16867     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16868                                errors.ECODE_INVAL)
16869
16870
16871 def _CheckForConflictingIp(lu, ip, node):
16872   """In case of conflicting IP address raise error.
16873
16874   @type ip: string
16875   @param ip: IP address
16876   @type node: string
16877   @param node: node name
16878
16879   """
16880   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16881   if conf_net is not None:
16882     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16883                                 (ip, conf_net)),
16884                                errors.ECODE_STATE)
16885
16886   return (None, None)