code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckGlobalHvParams(params):
1024   """Validates that given hypervisor params are not global ones.
1025
1026   This will ensure that instances don't get customised versions of
1027   global params.
1028
1029   """
1030   used_globals = constants.HVC_GLOBALS.intersection(params)
1031   if used_globals:
1032     msg = ("The following hypervisor parameters are global and cannot"
1033            " be customized at instance level, please modify them at"
1034            " cluster level: %s" % utils.CommaJoin(used_globals))
1035     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1036
1037
1038 def _CheckNodeOnline(lu, node, msg=None):
1039   """Ensure that a given node is online.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @param msg: if passed, should be a message to replace the default one
1044   @raise errors.OpPrereqError: if the node is offline
1045
1046   """
1047   if msg is None:
1048     msg = "Can't use offline node"
1049   if lu.cfg.GetNodeInfo(node).offline:
1050     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1051
1052
1053 def _CheckNodeNotDrained(lu, node):
1054   """Ensure that a given node is not drained.
1055
1056   @param lu: the LU on behalf of which we make the check
1057   @param node: the node to check
1058   @raise errors.OpPrereqError: if the node is drained
1059
1060   """
1061   if lu.cfg.GetNodeInfo(node).drained:
1062     raise errors.OpPrereqError("Can't use drained node %s" % node,
1063                                errors.ECODE_STATE)
1064
1065
1066 def _CheckNodeVmCapable(lu, node):
1067   """Ensure that a given node is vm capable.
1068
1069   @param lu: the LU on behalf of which we make the check
1070   @param node: the node to check
1071   @raise errors.OpPrereqError: if the node is not vm capable
1072
1073   """
1074   if not lu.cfg.GetNodeInfo(node).vm_capable:
1075     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1076                                errors.ECODE_STATE)
1077
1078
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080   """Ensure that a node supports a given OS.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param node: the node to check
1084   @param os_name: the OS to query about
1085   @param force_variant: whether to ignore variant errors
1086   @raise errors.OpPrereqError: if the node is not supporting the OS
1087
1088   """
1089   result = lu.rpc.call_os_get(node, os_name)
1090   result.Raise("OS '%s' not in supported OS list for node %s" %
1091                (os_name, node),
1092                prereq=True, ecode=errors.ECODE_INVAL)
1093   if not force_variant:
1094     _CheckOSVariant(result.payload, os_name)
1095
1096
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098   """Ensure that a node has the given secondary ip.
1099
1100   @type lu: L{LogicalUnit}
1101   @param lu: the LU on behalf of which we make the check
1102   @type node: string
1103   @param node: the node to check
1104   @type secondary_ip: string
1105   @param secondary_ip: the ip to check
1106   @type prereq: boolean
1107   @param prereq: whether to throw a prerequisite or an execute error
1108   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1110
1111   """
1112   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113   result.Raise("Failure checking secondary ip on node %s" % node,
1114                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115   if not result.payload:
1116     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117            " please fix and re-run this command" % secondary_ip)
1118     if prereq:
1119       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1120     else:
1121       raise errors.OpExecError(msg)
1122
1123
1124 def _CheckNodePVs(nresult, exclusive_storage):
1125   """Check node PVs.
1126
1127   """
1128   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129   if pvlist_dict is None:
1130     return (["Can't get PV list from node"], None)
1131   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1132   errlist = []
1133   # check that ':' is not present in PV names, since it's a
1134   # special character for lvcreate (denotes the range of PEs to
1135   # use on the PV)
1136   for pv in pvlist:
1137     if ":" in pv.name:
1138       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139                      (pv.name, pv.vg_name))
1140   es_pvinfo = None
1141   if exclusive_storage:
1142     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143     errlist.extend(errmsgs)
1144     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1145     if shared_pvs:
1146       for (pvname, lvlist) in shared_pvs:
1147         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149                        (pvname, utils.CommaJoin(lvlist)))
1150   return (errlist, es_pvinfo)
1151
1152
1153 def _GetClusterDomainSecret():
1154   """Reads the cluster domain secret.
1155
1156   """
1157   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1158                                strict=True)
1159
1160
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162   """Ensure that an instance is in one of the required states.
1163
1164   @param lu: the LU on behalf of which we make the check
1165   @param instance: the instance to check
1166   @param msg: if passed, should be a message to replace the default one
1167   @raise errors.OpPrereqError: if the instance is not in the required state
1168
1169   """
1170   if msg is None:
1171     msg = ("can't use instance from outside %s states" %
1172            utils.CommaJoin(req_states))
1173   if instance.admin_state not in req_states:
1174     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175                                (instance.name, instance.admin_state, msg),
1176                                errors.ECODE_STATE)
1177
1178   if constants.ADMINST_UP not in req_states:
1179     pnode = instance.primary_node
1180     if not lu.cfg.GetNodeInfo(pnode).offline:
1181       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183                   prereq=True, ecode=errors.ECODE_ENVIRON)
1184       if instance.name in ins_l.payload:
1185         raise errors.OpPrereqError("Instance %s is running, %s" %
1186                                    (instance.name, msg), errors.ECODE_STATE)
1187     else:
1188       lu.LogWarning("Primary node offline, ignoring check that instance"
1189                      " is down")
1190
1191
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193   """Computes if value is in the desired range.
1194
1195   @param name: name of the parameter for which we perform the check
1196   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1197       not just 'disk')
1198   @param ipolicy: dictionary containing min, max and std values
1199   @param value: actual value that we want to use
1200   @return: None or element not meeting the criteria
1201
1202
1203   """
1204   if value in [None, constants.VALUE_AUTO]:
1205     return None
1206   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208   if value > max_v or min_v > value:
1209     if qualifier:
1210       fqn = "%s/%s" % (name, qualifier)
1211     else:
1212       fqn = name
1213     return ("%s value %s is not in range [%s, %s]" %
1214             (fqn, value, min_v, max_v))
1215   return None
1216
1217
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219                                  nic_count, disk_sizes, spindle_use,
1220                                  _compute_fn=_ComputeMinMaxSpec):
1221   """Verifies ipolicy against provided specs.
1222
1223   @type ipolicy: dict
1224   @param ipolicy: The ipolicy
1225   @type mem_size: int
1226   @param mem_size: The memory size
1227   @type cpu_count: int
1228   @param cpu_count: Used cpu cores
1229   @type disk_count: int
1230   @param disk_count: Number of disks used
1231   @type nic_count: int
1232   @param nic_count: Number of nics used
1233   @type disk_sizes: list of ints
1234   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235   @type spindle_use: int
1236   @param spindle_use: The number of spindles this instance uses
1237   @param _compute_fn: The compute function (unittest only)
1238   @return: A list of violations, or an empty list of no violations are found
1239
1240   """
1241   assert disk_count == len(disk_sizes)
1242
1243   test_settings = [
1244     (constants.ISPEC_MEM_SIZE, "", mem_size),
1245     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246     (constants.ISPEC_DISK_COUNT, "", disk_count),
1247     (constants.ISPEC_NIC_COUNT, "", nic_count),
1248     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250          for idx, d in enumerate(disk_sizes)]
1251
1252   return filter(None,
1253                 (_compute_fn(name, qualifier, ipolicy, value)
1254                  for (name, qualifier, value) in test_settings))
1255
1256
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258                                      _compute_fn=_ComputeIPolicySpecViolation):
1259   """Compute if instance meets the specs of ipolicy.
1260
1261   @type ipolicy: dict
1262   @param ipolicy: The ipolicy to verify against
1263   @type instance: L{objects.Instance}
1264   @param instance: The instance to verify
1265   @param _compute_fn: The function to verify ipolicy (unittest only)
1266   @see: L{_ComputeIPolicySpecViolation}
1267
1268   """
1269   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272   disk_count = len(instance.disks)
1273   disk_sizes = [disk.size for disk in instance.disks]
1274   nic_count = len(instance.nics)
1275
1276   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277                      disk_sizes, spindle_use)
1278
1279
1280 def _ComputeIPolicyInstanceSpecViolation(
1281   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282   """Compute if instance specs meets the specs of ipolicy.
1283
1284   @type ipolicy: dict
1285   @param ipolicy: The ipolicy to verify against
1286   @param instance_spec: dict
1287   @param instance_spec: The instance spec to verify
1288   @param _compute_fn: The function to verify ipolicy (unittest only)
1289   @see: L{_ComputeIPolicySpecViolation}
1290
1291   """
1292   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1298
1299   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300                      disk_sizes, spindle_use)
1301
1302
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1304                                  target_group,
1305                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1306   """Compute if instance meets the specs of the new target group.
1307
1308   @param ipolicy: The ipolicy to verify
1309   @param instance: The instance object to verify
1310   @param current_group: The current group of the instance
1311   @param target_group: The new group of the instance
1312   @param _compute_fn: The function to verify ipolicy (unittest only)
1313   @see: L{_ComputeIPolicySpecViolation}
1314
1315   """
1316   if current_group == target_group:
1317     return []
1318   else:
1319     return _compute_fn(ipolicy, instance)
1320
1321
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323                             _compute_fn=_ComputeIPolicyNodeViolation):
1324   """Checks that the target node is correct in terms of instance policy.
1325
1326   @param ipolicy: The ipolicy to verify
1327   @param instance: The instance object to verify
1328   @param node: The new node to relocate
1329   @param ignore: Ignore violations of the ipolicy
1330   @param _compute_fn: The function to verify ipolicy (unittest only)
1331   @see: L{_ComputeIPolicySpecViolation}
1332
1333   """
1334   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1336
1337   if res:
1338     msg = ("Instance does not meet target node group's (%s) instance"
1339            " policy: %s") % (node.group, utils.CommaJoin(res))
1340     if ignore:
1341       lu.LogWarning(msg)
1342     else:
1343       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1344
1345
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347   """Computes a set of any instances that would violate the new ipolicy.
1348
1349   @param old_ipolicy: The current (still in-place) ipolicy
1350   @param new_ipolicy: The new (to become) ipolicy
1351   @param instances: List of instances to verify
1352   @return: A list of instances which violates the new ipolicy but
1353       did not before
1354
1355   """
1356   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357           _ComputeViolatingInstances(old_ipolicy, instances))
1358
1359
1360 def _ExpandItemName(fn, name, kind):
1361   """Expand an item name.
1362
1363   @param fn: the function to use for expansion
1364   @param name: requested item name
1365   @param kind: text description ('Node' or 'Instance')
1366   @return: the resolved (full) name
1367   @raise errors.OpPrereqError: if the item is not found
1368
1369   """
1370   full_name = fn(name)
1371   if full_name is None:
1372     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1373                                errors.ECODE_NOENT)
1374   return full_name
1375
1376
1377 def _ExpandNodeName(cfg, name):
1378   """Wrapper over L{_ExpandItemName} for nodes."""
1379   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1380
1381
1382 def _ExpandInstanceName(cfg, name):
1383   """Wrapper over L{_ExpandItemName} for instance."""
1384   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1385
1386
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388                          network_type, mac_prefix, tags):
1389   """Builds network related env variables for hooks
1390
1391   This builds the hook environment from individual variables.
1392
1393   @type name: string
1394   @param name: the name of the network
1395   @type subnet: string
1396   @param subnet: the ipv4 subnet
1397   @type gateway: string
1398   @param gateway: the ipv4 gateway
1399   @type network6: string
1400   @param network6: the ipv6 subnet
1401   @type gateway6: string
1402   @param gateway6: the ipv6 gateway
1403   @type network_type: string
1404   @param network_type: the type of the network
1405   @type mac_prefix: string
1406   @param mac_prefix: the mac_prefix
1407   @type tags: list
1408   @param tags: the tags of the network
1409
1410   """
1411   env = {}
1412   if name:
1413     env["NETWORK_NAME"] = name
1414   if subnet:
1415     env["NETWORK_SUBNET"] = subnet
1416   if gateway:
1417     env["NETWORK_GATEWAY"] = gateway
1418   if network6:
1419     env["NETWORK_SUBNET6"] = network6
1420   if gateway6:
1421     env["NETWORK_GATEWAY6"] = gateway6
1422   if mac_prefix:
1423     env["NETWORK_MAC_PREFIX"] = mac_prefix
1424   if network_type:
1425     env["NETWORK_TYPE"] = network_type
1426   if tags:
1427     env["NETWORK_TAGS"] = " ".join(tags)
1428
1429   return env
1430
1431
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433                           minmem, maxmem, vcpus, nics, disk_template, disks,
1434                           bep, hvp, hypervisor_name, tags):
1435   """Builds instance related env variables for hooks
1436
1437   This builds the hook environment from individual variables.
1438
1439   @type name: string
1440   @param name: the name of the instance
1441   @type primary_node: string
1442   @param primary_node: the name of the instance's primary node
1443   @type secondary_nodes: list
1444   @param secondary_nodes: list of secondary nodes as strings
1445   @type os_type: string
1446   @param os_type: the name of the instance's OS
1447   @type status: string
1448   @param status: the desired status of the instance
1449   @type minmem: string
1450   @param minmem: the minimum memory size of the instance
1451   @type maxmem: string
1452   @param maxmem: the maximum memory size of the instance
1453   @type vcpus: string
1454   @param vcpus: the count of VCPUs the instance has
1455   @type nics: list
1456   @param nics: list of tuples (ip, mac, mode, link, network) representing
1457       the NICs the instance has
1458   @type disk_template: string
1459   @param disk_template: the disk template of the instance
1460   @type disks: list
1461   @param disks: the list of (size, mode) pairs
1462   @type bep: dict
1463   @param bep: the backend parameters for the instance
1464   @type hvp: dict
1465   @param hvp: the hypervisor parameters for the instance
1466   @type hypervisor_name: string
1467   @param hypervisor_name: the hypervisor for the instance
1468   @type tags: list
1469   @param tags: list of instance tags as strings
1470   @rtype: dict
1471   @return: the hook environment for this instance
1472
1473   """
1474   env = {
1475     "OP_TARGET": name,
1476     "INSTANCE_NAME": name,
1477     "INSTANCE_PRIMARY": primary_node,
1478     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479     "INSTANCE_OS_TYPE": os_type,
1480     "INSTANCE_STATUS": status,
1481     "INSTANCE_MINMEM": minmem,
1482     "INSTANCE_MAXMEM": maxmem,
1483     # TODO(2.7) remove deprecated "memory" value
1484     "INSTANCE_MEMORY": maxmem,
1485     "INSTANCE_VCPUS": vcpus,
1486     "INSTANCE_DISK_TEMPLATE": disk_template,
1487     "INSTANCE_HYPERVISOR": hypervisor_name,
1488   }
1489   if nics:
1490     nic_count = len(nics)
1491     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1492       if ip is None:
1493         ip = ""
1494       env["INSTANCE_NIC%d_IP" % idx] = ip
1495       env["INSTANCE_NIC%d_MAC" % idx] = mac
1496       env["INSTANCE_NIC%d_MODE" % idx] = mode
1497       env["INSTANCE_NIC%d_LINK" % idx] = link
1498       if network:
1499         env["INSTANCE_NIC%d_NETWORK" % idx] = net
1500         if netinfo:
1501           nobj = objects.Network.FromDict(netinfo)
1502           if nobj.network:
1503             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1504           if nobj.gateway:
1505             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1506           if nobj.network6:
1507             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1508           if nobj.gateway6:
1509             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1510           if nobj.mac_prefix:
1511             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512           if nobj.network_type:
1513             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1514           if nobj.tags:
1515             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   cluster = lu.cfg.GetClusterInfo()
1555   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556   mode = filled_params[constants.NIC_MODE]
1557   link = filled_params[constants.NIC_LINK]
1558   netinfo = None
1559   if nic.network:
1560     net_uuid = lu.cfg.LookupNetwork(nic.network)
1561     netinfo = objects.Network.ToDict(lu.cfg.GetNetwork(net_uuid))
1562
1563   return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1564
1565
1566 def _NICListToTuple(lu, nics):
1567   """Build a list of nic information tuples.
1568
1569   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1570   value in LUInstanceQueryData.
1571
1572   @type lu:  L{LogicalUnit}
1573   @param lu: the logical unit on whose behalf we execute
1574   @type nics: list of L{objects.NIC}
1575   @param nics: list of nics to convert to hooks tuples
1576
1577   """
1578   hooks_nics = []
1579   for nic in nics:
1580     hooks_nics.append(_NICToTuple(lu, nic))
1581   return hooks_nics
1582
1583
1584 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1585   """Builds instance related env variables for hooks from an object.
1586
1587   @type lu: L{LogicalUnit}
1588   @param lu: the logical unit on whose behalf we execute
1589   @type instance: L{objects.Instance}
1590   @param instance: the instance for which we should build the
1591       environment
1592   @type override: dict
1593   @param override: dictionary with key/values that will override
1594       our values
1595   @rtype: dict
1596   @return: the hook environment dictionary
1597
1598   """
1599   cluster = lu.cfg.GetClusterInfo()
1600   bep = cluster.FillBE(instance)
1601   hvp = cluster.FillHV(instance)
1602   args = {
1603     "name": instance.name,
1604     "primary_node": instance.primary_node,
1605     "secondary_nodes": instance.secondary_nodes,
1606     "os_type": instance.os,
1607     "status": instance.admin_state,
1608     "maxmem": bep[constants.BE_MAXMEM],
1609     "minmem": bep[constants.BE_MINMEM],
1610     "vcpus": bep[constants.BE_VCPUS],
1611     "nics": _NICListToTuple(lu, instance.nics),
1612     "disk_template": instance.disk_template,
1613     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1614     "bep": bep,
1615     "hvp": hvp,
1616     "hypervisor_name": instance.hypervisor,
1617     "tags": instance.tags,
1618   }
1619   if override:
1620     args.update(override)
1621   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1622
1623
1624 def _AdjustCandidatePool(lu, exceptions):
1625   """Adjust the candidate pool after node operations.
1626
1627   """
1628   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1629   if mod_list:
1630     lu.LogInfo("Promoted nodes to master candidate role: %s",
1631                utils.CommaJoin(node.name for node in mod_list))
1632     for name in mod_list:
1633       lu.context.ReaddNode(name)
1634   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1635   if mc_now > mc_max:
1636     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1637                (mc_now, mc_max))
1638
1639
1640 def _DecideSelfPromotion(lu, exceptions=None):
1641   """Decide whether I should promote myself as a master candidate.
1642
1643   """
1644   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1645   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1646   # the new node will increase mc_max with one, so:
1647   mc_should = min(mc_should + 1, cp_size)
1648   return mc_now < mc_should
1649
1650
1651 def _ComputeViolatingInstances(ipolicy, instances):
1652   """Computes a set of instances who violates given ipolicy.
1653
1654   @param ipolicy: The ipolicy to verify
1655   @type instances: object.Instance
1656   @param instances: List of instances to verify
1657   @return: A frozenset of instance names violating the ipolicy
1658
1659   """
1660   return frozenset([inst.name for inst in instances
1661                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1662
1663
1664 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1665   """Check that the brigdes needed by a list of nics exist.
1666
1667   """
1668   cluster = lu.cfg.GetClusterInfo()
1669   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1670   brlist = [params[constants.NIC_LINK] for params in paramslist
1671             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1672   if brlist:
1673     result = lu.rpc.call_bridges_exist(target_node, brlist)
1674     result.Raise("Error checking bridges on destination node '%s'" %
1675                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1676
1677
1678 def _CheckInstanceBridgesExist(lu, instance, node=None):
1679   """Check that the brigdes needed by an instance exist.
1680
1681   """
1682   if node is None:
1683     node = instance.primary_node
1684   _CheckNicsBridgesExist(lu, instance.nics, node)
1685
1686
1687 def _CheckOSVariant(os_obj, name):
1688   """Check whether an OS name conforms to the os variants specification.
1689
1690   @type os_obj: L{objects.OS}
1691   @param os_obj: OS object to check
1692   @type name: string
1693   @param name: OS name passed by the user, to check for validity
1694
1695   """
1696   variant = objects.OS.GetVariant(name)
1697   if not os_obj.supported_variants:
1698     if variant:
1699       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1700                                  " passed)" % (os_obj.name, variant),
1701                                  errors.ECODE_INVAL)
1702     return
1703   if not variant:
1704     raise errors.OpPrereqError("OS name must include a variant",
1705                                errors.ECODE_INVAL)
1706
1707   if variant not in os_obj.supported_variants:
1708     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1709
1710
1711 def _GetNodeInstancesInner(cfg, fn):
1712   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1713
1714
1715 def _GetNodeInstances(cfg, node_name):
1716   """Returns a list of all primary and secondary instances on a node.
1717
1718   """
1719
1720   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1721
1722
1723 def _GetNodePrimaryInstances(cfg, node_name):
1724   """Returns primary instances on a node.
1725
1726   """
1727   return _GetNodeInstancesInner(cfg,
1728                                 lambda inst: node_name == inst.primary_node)
1729
1730
1731 def _GetNodeSecondaryInstances(cfg, node_name):
1732   """Returns secondary instances on a node.
1733
1734   """
1735   return _GetNodeInstancesInner(cfg,
1736                                 lambda inst: node_name in inst.secondary_nodes)
1737
1738
1739 def _GetStorageTypeArgs(cfg, storage_type):
1740   """Returns the arguments for a storage type.
1741
1742   """
1743   # Special case for file storage
1744   if storage_type == constants.ST_FILE:
1745     # storage.FileStorage wants a list of storage directories
1746     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1747
1748   return []
1749
1750
1751 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1752   faulty = []
1753
1754   for dev in instance.disks:
1755     cfg.SetDiskID(dev, node_name)
1756
1757   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1758                                                                 instance))
1759   result.Raise("Failed to get disk status from node %s" % node_name,
1760                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1761
1762   for idx, bdev_status in enumerate(result.payload):
1763     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1764       faulty.append(idx)
1765
1766   return faulty
1767
1768
1769 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1770   """Check the sanity of iallocator and node arguments and use the
1771   cluster-wide iallocator if appropriate.
1772
1773   Check that at most one of (iallocator, node) is specified. If none is
1774   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1775   then the LU's opcode's iallocator slot is filled with the cluster-wide
1776   default iallocator.
1777
1778   @type iallocator_slot: string
1779   @param iallocator_slot: the name of the opcode iallocator slot
1780   @type node_slot: string
1781   @param node_slot: the name of the opcode target node slot
1782
1783   """
1784   node = getattr(lu.op, node_slot, None)
1785   ialloc = getattr(lu.op, iallocator_slot, None)
1786   if node == []:
1787     node = None
1788
1789   if node is not None and ialloc is not None:
1790     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1791                                errors.ECODE_INVAL)
1792   elif ((node is None and ialloc is None) or
1793         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1794     default_iallocator = lu.cfg.GetDefaultIAllocator()
1795     if default_iallocator:
1796       setattr(lu.op, iallocator_slot, default_iallocator)
1797     else:
1798       raise errors.OpPrereqError("No iallocator or node given and no"
1799                                  " cluster-wide default iallocator found;"
1800                                  " please specify either an iallocator or a"
1801                                  " node, or set a cluster-wide default"
1802                                  " iallocator", errors.ECODE_INVAL)
1803
1804
1805 def _GetDefaultIAllocator(cfg, ialloc):
1806   """Decides on which iallocator to use.
1807
1808   @type cfg: L{config.ConfigWriter}
1809   @param cfg: Cluster configuration object
1810   @type ialloc: string or None
1811   @param ialloc: Iallocator specified in opcode
1812   @rtype: string
1813   @return: Iallocator name
1814
1815   """
1816   if not ialloc:
1817     # Use default iallocator
1818     ialloc = cfg.GetDefaultIAllocator()
1819
1820   if not ialloc:
1821     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1822                                " opcode nor as a cluster-wide default",
1823                                errors.ECODE_INVAL)
1824
1825   return ialloc
1826
1827
1828 def _CheckHostnameSane(lu, name):
1829   """Ensures that a given hostname resolves to a 'sane' name.
1830
1831   The given name is required to be a prefix of the resolved hostname,
1832   to prevent accidental mismatches.
1833
1834   @param lu: the logical unit on behalf of which we're checking
1835   @param name: the name we should resolve and check
1836   @return: the resolved hostname object
1837
1838   """
1839   hostname = netutils.GetHostname(name=name)
1840   if hostname.name != name:
1841     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1842   if not utils.MatchNameComponent(name, [hostname.name]):
1843     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1844                                 " same as given hostname '%s'") %
1845                                 (hostname.name, name), errors.ECODE_INVAL)
1846   return hostname
1847
1848
1849 class LUClusterPostInit(LogicalUnit):
1850   """Logical unit for running hooks after cluster initialization.
1851
1852   """
1853   HPATH = "cluster-init"
1854   HTYPE = constants.HTYPE_CLUSTER
1855
1856   def BuildHooksEnv(self):
1857     """Build hooks env.
1858
1859     """
1860     return {
1861       "OP_TARGET": self.cfg.GetClusterName(),
1862       }
1863
1864   def BuildHooksNodes(self):
1865     """Build hooks nodes.
1866
1867     """
1868     return ([], [self.cfg.GetMasterNode()])
1869
1870   def Exec(self, feedback_fn):
1871     """Nothing to do.
1872
1873     """
1874     return True
1875
1876
1877 class LUClusterDestroy(LogicalUnit):
1878   """Logical unit for destroying the cluster.
1879
1880   """
1881   HPATH = "cluster-destroy"
1882   HTYPE = constants.HTYPE_CLUSTER
1883
1884   def BuildHooksEnv(self):
1885     """Build hooks env.
1886
1887     """
1888     return {
1889       "OP_TARGET": self.cfg.GetClusterName(),
1890       }
1891
1892   def BuildHooksNodes(self):
1893     """Build hooks nodes.
1894
1895     """
1896     return ([], [])
1897
1898   def CheckPrereq(self):
1899     """Check prerequisites.
1900
1901     This checks whether the cluster is empty.
1902
1903     Any errors are signaled by raising errors.OpPrereqError.
1904
1905     """
1906     master = self.cfg.GetMasterNode()
1907
1908     nodelist = self.cfg.GetNodeList()
1909     if len(nodelist) != 1 or nodelist[0] != master:
1910       raise errors.OpPrereqError("There are still %d node(s) in"
1911                                  " this cluster." % (len(nodelist) - 1),
1912                                  errors.ECODE_INVAL)
1913     instancelist = self.cfg.GetInstanceList()
1914     if instancelist:
1915       raise errors.OpPrereqError("There are still %d instance(s) in"
1916                                  " this cluster." % len(instancelist),
1917                                  errors.ECODE_INVAL)
1918
1919   def Exec(self, feedback_fn):
1920     """Destroys the cluster.
1921
1922     """
1923     master_params = self.cfg.GetMasterNetworkParameters()
1924
1925     # Run post hooks on master node before it's removed
1926     _RunPostHook(self, master_params.name)
1927
1928     ems = self.cfg.GetUseExternalMipScript()
1929     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1930                                                      master_params, ems)
1931     if result.fail_msg:
1932       self.LogWarning("Error disabling the master IP address: %s",
1933                       result.fail_msg)
1934
1935     return master_params.name
1936
1937
1938 def _VerifyCertificate(filename):
1939   """Verifies a certificate for L{LUClusterVerifyConfig}.
1940
1941   @type filename: string
1942   @param filename: Path to PEM file
1943
1944   """
1945   try:
1946     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1947                                            utils.ReadFile(filename))
1948   except Exception, err: # pylint: disable=W0703
1949     return (LUClusterVerifyConfig.ETYPE_ERROR,
1950             "Failed to load X509 certificate %s: %s" % (filename, err))
1951
1952   (errcode, msg) = \
1953     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1954                                 constants.SSL_CERT_EXPIRATION_ERROR)
1955
1956   if msg:
1957     fnamemsg = "While verifying %s: %s" % (filename, msg)
1958   else:
1959     fnamemsg = None
1960
1961   if errcode is None:
1962     return (None, fnamemsg)
1963   elif errcode == utils.CERT_WARNING:
1964     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1965   elif errcode == utils.CERT_ERROR:
1966     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1967
1968   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1969
1970
1971 def _GetAllHypervisorParameters(cluster, instances):
1972   """Compute the set of all hypervisor parameters.
1973
1974   @type cluster: L{objects.Cluster}
1975   @param cluster: the cluster object
1976   @param instances: list of L{objects.Instance}
1977   @param instances: additional instances from which to obtain parameters
1978   @rtype: list of (origin, hypervisor, parameters)
1979   @return: a list with all parameters found, indicating the hypervisor they
1980        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1981
1982   """
1983   hvp_data = []
1984
1985   for hv_name in cluster.enabled_hypervisors:
1986     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1987
1988   for os_name, os_hvp in cluster.os_hvp.items():
1989     for hv_name, hv_params in os_hvp.items():
1990       if hv_params:
1991         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1992         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1993
1994   # TODO: collapse identical parameter values in a single one
1995   for instance in instances:
1996     if instance.hvparams:
1997       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1998                        cluster.FillHV(instance)))
1999
2000   return hvp_data
2001
2002
2003 class _VerifyErrors(object):
2004   """Mix-in for cluster/group verify LUs.
2005
2006   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2007   self.op and self._feedback_fn to be available.)
2008
2009   """
2010
2011   ETYPE_FIELD = "code"
2012   ETYPE_ERROR = "ERROR"
2013   ETYPE_WARNING = "WARNING"
2014
2015   def _Error(self, ecode, item, msg, *args, **kwargs):
2016     """Format an error message.
2017
2018     Based on the opcode's error_codes parameter, either format a
2019     parseable error code, or a simpler error string.
2020
2021     This must be called only from Exec and functions called from Exec.
2022
2023     """
2024     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2025     itype, etxt, _ = ecode
2026     # If the error code is in the list of ignored errors, demote the error to a
2027     # warning
2028     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2029       ltype = self.ETYPE_WARNING
2030     # first complete the msg
2031     if args:
2032       msg = msg % args
2033     # then format the whole message
2034     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2035       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2036     else:
2037       if item:
2038         item = " " + item
2039       else:
2040         item = ""
2041       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2042     # and finally report it via the feedback_fn
2043     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2044     # do not mark the operation as failed for WARN cases only
2045     if ltype == self.ETYPE_ERROR:
2046       self.bad = True
2047
2048   def _ErrorIf(self, cond, *args, **kwargs):
2049     """Log an error message if the passed condition is True.
2050
2051     """
2052     if (bool(cond)
2053         or self.op.debug_simulate_errors): # pylint: disable=E1101
2054       self._Error(*args, **kwargs)
2055
2056
2057 class LUClusterVerify(NoHooksLU):
2058   """Submits all jobs necessary to verify the cluster.
2059
2060   """
2061   REQ_BGL = False
2062
2063   def ExpandNames(self):
2064     self.needed_locks = {}
2065
2066   def Exec(self, feedback_fn):
2067     jobs = []
2068
2069     if self.op.group_name:
2070       groups = [self.op.group_name]
2071       depends_fn = lambda: None
2072     else:
2073       groups = self.cfg.GetNodeGroupList()
2074
2075       # Verify global configuration
2076       jobs.append([
2077         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2078         ])
2079
2080       # Always depend on global verification
2081       depends_fn = lambda: [(-len(jobs), [])]
2082
2083     jobs.extend(
2084       [opcodes.OpClusterVerifyGroup(group_name=group,
2085                                     ignore_errors=self.op.ignore_errors,
2086                                     depends=depends_fn())]
2087       for group in groups)
2088
2089     # Fix up all parameters
2090     for op in itertools.chain(*jobs): # pylint: disable=W0142
2091       op.debug_simulate_errors = self.op.debug_simulate_errors
2092       op.verbose = self.op.verbose
2093       op.error_codes = self.op.error_codes
2094       try:
2095         op.skip_checks = self.op.skip_checks
2096       except AttributeError:
2097         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2098
2099     return ResultWithJobs(jobs)
2100
2101
2102 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2103   """Verifies the cluster config.
2104
2105   """
2106   REQ_BGL = False
2107
2108   def _VerifyHVP(self, hvp_data):
2109     """Verifies locally the syntax of the hypervisor parameters.
2110
2111     """
2112     for item, hv_name, hv_params in hvp_data:
2113       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2114              (item, hv_name))
2115       try:
2116         hv_class = hypervisor.GetHypervisorClass(hv_name)
2117         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2118         hv_class.CheckParameterSyntax(hv_params)
2119       except errors.GenericError, err:
2120         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2121
2122   def ExpandNames(self):
2123     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2124     self.share_locks = _ShareAll()
2125
2126   def CheckPrereq(self):
2127     """Check prerequisites.
2128
2129     """
2130     # Retrieve all information
2131     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2132     self.all_node_info = self.cfg.GetAllNodesInfo()
2133     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2134
2135   def Exec(self, feedback_fn):
2136     """Verify integrity of cluster, performing various test on nodes.
2137
2138     """
2139     self.bad = False
2140     self._feedback_fn = feedback_fn
2141
2142     feedback_fn("* Verifying cluster config")
2143
2144     for msg in self.cfg.VerifyConfig():
2145       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2146
2147     feedback_fn("* Verifying cluster certificate files")
2148
2149     for cert_filename in pathutils.ALL_CERT_FILES:
2150       (errcode, msg) = _VerifyCertificate(cert_filename)
2151       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2152
2153     feedback_fn("* Verifying hypervisor parameters")
2154
2155     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2156                                                 self.all_inst_info.values()))
2157
2158     feedback_fn("* Verifying all nodes belong to an existing group")
2159
2160     # We do this verification here because, should this bogus circumstance
2161     # occur, it would never be caught by VerifyGroup, which only acts on
2162     # nodes/instances reachable from existing node groups.
2163
2164     dangling_nodes = set(node.name for node in self.all_node_info.values()
2165                          if node.group not in self.all_group_info)
2166
2167     dangling_instances = {}
2168     no_node_instances = []
2169
2170     for inst in self.all_inst_info.values():
2171       if inst.primary_node in dangling_nodes:
2172         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2173       elif inst.primary_node not in self.all_node_info:
2174         no_node_instances.append(inst.name)
2175
2176     pretty_dangling = [
2177         "%s (%s)" %
2178         (node.name,
2179          utils.CommaJoin(dangling_instances.get(node.name,
2180                                                 ["no instances"])))
2181         for node in dangling_nodes]
2182
2183     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2184                   None,
2185                   "the following nodes (and their instances) belong to a non"
2186                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2187
2188     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2189                   None,
2190                   "the following instances have a non-existing primary-node:"
2191                   " %s", utils.CommaJoin(no_node_instances))
2192
2193     return not self.bad
2194
2195
2196 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2197   """Verifies the status of a node group.
2198
2199   """
2200   HPATH = "cluster-verify"
2201   HTYPE = constants.HTYPE_CLUSTER
2202   REQ_BGL = False
2203
2204   _HOOKS_INDENT_RE = re.compile("^", re.M)
2205
2206   class NodeImage(object):
2207     """A class representing the logical and physical status of a node.
2208
2209     @type name: string
2210     @ivar name: the node name to which this object refers
2211     @ivar volumes: a structure as returned from
2212         L{ganeti.backend.GetVolumeList} (runtime)
2213     @ivar instances: a list of running instances (runtime)
2214     @ivar pinst: list of configured primary instances (config)
2215     @ivar sinst: list of configured secondary instances (config)
2216     @ivar sbp: dictionary of {primary-node: list of instances} for all
2217         instances for which this node is secondary (config)
2218     @ivar mfree: free memory, as reported by hypervisor (runtime)
2219     @ivar dfree: free disk, as reported by the node (runtime)
2220     @ivar offline: the offline status (config)
2221     @type rpc_fail: boolean
2222     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2223         not whether the individual keys were correct) (runtime)
2224     @type lvm_fail: boolean
2225     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2226     @type hyp_fail: boolean
2227     @ivar hyp_fail: whether the RPC call didn't return the instance list
2228     @type ghost: boolean
2229     @ivar ghost: whether this is a known node or not (config)
2230     @type os_fail: boolean
2231     @ivar os_fail: whether the RPC call didn't return valid OS data
2232     @type oslist: list
2233     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2234     @type vm_capable: boolean
2235     @ivar vm_capable: whether the node can host instances
2236     @type pv_min: float
2237     @ivar pv_min: size in MiB of the smallest PVs
2238     @type pv_max: float
2239     @ivar pv_max: size in MiB of the biggest PVs
2240
2241     """
2242     def __init__(self, offline=False, name=None, vm_capable=True):
2243       self.name = name
2244       self.volumes = {}
2245       self.instances = []
2246       self.pinst = []
2247       self.sinst = []
2248       self.sbp = {}
2249       self.mfree = 0
2250       self.dfree = 0
2251       self.offline = offline
2252       self.vm_capable = vm_capable
2253       self.rpc_fail = False
2254       self.lvm_fail = False
2255       self.hyp_fail = False
2256       self.ghost = False
2257       self.os_fail = False
2258       self.oslist = {}
2259       self.pv_min = None
2260       self.pv_max = None
2261
2262   def ExpandNames(self):
2263     # This raises errors.OpPrereqError on its own:
2264     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2265
2266     # Get instances in node group; this is unsafe and needs verification later
2267     inst_names = \
2268       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2269
2270     self.needed_locks = {
2271       locking.LEVEL_INSTANCE: inst_names,
2272       locking.LEVEL_NODEGROUP: [self.group_uuid],
2273       locking.LEVEL_NODE: [],
2274
2275       # This opcode is run by watcher every five minutes and acquires all nodes
2276       # for a group. It doesn't run for a long time, so it's better to acquire
2277       # the node allocation lock as well.
2278       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2279       }
2280
2281     self.share_locks = _ShareAll()
2282
2283   def DeclareLocks(self, level):
2284     if level == locking.LEVEL_NODE:
2285       # Get members of node group; this is unsafe and needs verification later
2286       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2287
2288       all_inst_info = self.cfg.GetAllInstancesInfo()
2289
2290       # In Exec(), we warn about mirrored instances that have primary and
2291       # secondary living in separate node groups. To fully verify that
2292       # volumes for these instances are healthy, we will need to do an
2293       # extra call to their secondaries. We ensure here those nodes will
2294       # be locked.
2295       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2296         # Important: access only the instances whose lock is owned
2297         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2298           nodes.update(all_inst_info[inst].secondary_nodes)
2299
2300       self.needed_locks[locking.LEVEL_NODE] = nodes
2301
2302   def CheckPrereq(self):
2303     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2304     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2305
2306     group_nodes = set(self.group_info.members)
2307     group_instances = \
2308       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2309
2310     unlocked_nodes = \
2311         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312
2313     unlocked_instances = \
2314         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2315
2316     if unlocked_nodes:
2317       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2318                                  utils.CommaJoin(unlocked_nodes),
2319                                  errors.ECODE_STATE)
2320
2321     if unlocked_instances:
2322       raise errors.OpPrereqError("Missing lock for instances: %s" %
2323                                  utils.CommaJoin(unlocked_instances),
2324                                  errors.ECODE_STATE)
2325
2326     self.all_node_info = self.cfg.GetAllNodesInfo()
2327     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2328
2329     self.my_node_names = utils.NiceSort(group_nodes)
2330     self.my_inst_names = utils.NiceSort(group_instances)
2331
2332     self.my_node_info = dict((name, self.all_node_info[name])
2333                              for name in self.my_node_names)
2334
2335     self.my_inst_info = dict((name, self.all_inst_info[name])
2336                              for name in self.my_inst_names)
2337
2338     # We detect here the nodes that will need the extra RPC calls for verifying
2339     # split LV volumes; they should be locked.
2340     extra_lv_nodes = set()
2341
2342     for inst in self.my_inst_info.values():
2343       if inst.disk_template in constants.DTS_INT_MIRROR:
2344         for nname in inst.all_nodes:
2345           if self.all_node_info[nname].group != self.group_uuid:
2346             extra_lv_nodes.add(nname)
2347
2348     unlocked_lv_nodes = \
2349         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2350
2351     if unlocked_lv_nodes:
2352       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2353                                  utils.CommaJoin(unlocked_lv_nodes),
2354                                  errors.ECODE_STATE)
2355     self.extra_lv_nodes = list(extra_lv_nodes)
2356
2357   def _VerifyNode(self, ninfo, nresult):
2358     """Perform some basic validation on data returned from a node.
2359
2360       - check the result data structure is well formed and has all the
2361         mandatory fields
2362       - check ganeti version
2363
2364     @type ninfo: L{objects.Node}
2365     @param ninfo: the node to check
2366     @param nresult: the results from the node
2367     @rtype: boolean
2368     @return: whether overall this call was successful (and we can expect
2369          reasonable values in the respose)
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     # main result, nresult should be a non-empty dict
2376     test = not nresult or not isinstance(nresult, dict)
2377     _ErrorIf(test, constants.CV_ENODERPC, node,
2378                   "unable to verify node: no data returned")
2379     if test:
2380       return False
2381
2382     # compares ganeti version
2383     local_version = constants.PROTOCOL_VERSION
2384     remote_version = nresult.get("version", None)
2385     test = not (remote_version and
2386                 isinstance(remote_version, (list, tuple)) and
2387                 len(remote_version) == 2)
2388     _ErrorIf(test, constants.CV_ENODERPC, node,
2389              "connection to node returned invalid data")
2390     if test:
2391       return False
2392
2393     test = local_version != remote_version[0]
2394     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2395              "incompatible protocol versions: master %s,"
2396              " node %s", local_version, remote_version[0])
2397     if test:
2398       return False
2399
2400     # node seems compatible, we can actually try to look into its results
2401
2402     # full package version
2403     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2404                   constants.CV_ENODEVERSION, node,
2405                   "software version mismatch: master %s, node %s",
2406                   constants.RELEASE_VERSION, remote_version[1],
2407                   code=self.ETYPE_WARNING)
2408
2409     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2410     if ninfo.vm_capable and isinstance(hyp_result, dict):
2411       for hv_name, hv_result in hyp_result.iteritems():
2412         test = hv_result is not None
2413         _ErrorIf(test, constants.CV_ENODEHV, node,
2414                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2415
2416     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2417     if ninfo.vm_capable and isinstance(hvp_result, list):
2418       for item, hv_name, hv_result in hvp_result:
2419         _ErrorIf(True, constants.CV_ENODEHV, node,
2420                  "hypervisor %s parameter verify failure (source %s): %s",
2421                  hv_name, item, hv_result)
2422
2423     test = nresult.get(constants.NV_NODESETUP,
2424                        ["Missing NODESETUP results"])
2425     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2426              "; ".join(test))
2427
2428     return True
2429
2430   def _VerifyNodeTime(self, ninfo, nresult,
2431                       nvinfo_starttime, nvinfo_endtime):
2432     """Check the node time.
2433
2434     @type ninfo: L{objects.Node}
2435     @param ninfo: the node to check
2436     @param nresult: the remote results for the node
2437     @param nvinfo_starttime: the start time of the RPC call
2438     @param nvinfo_endtime: the end time of the RPC call
2439
2440     """
2441     node = ninfo.name
2442     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2443
2444     ntime = nresult.get(constants.NV_TIME, None)
2445     try:
2446       ntime_merged = utils.MergeTime(ntime)
2447     except (ValueError, TypeError):
2448       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2449       return
2450
2451     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2452       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2453     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2454       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2455     else:
2456       ntime_diff = None
2457
2458     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2459              "Node time diverges by at least %s from master node time",
2460              ntime_diff)
2461
2462   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2463     """Check the node LVM results and update info for cross-node checks.
2464
2465     @type ninfo: L{objects.Node}
2466     @param ninfo: the node to check
2467     @param nresult: the remote results for the node
2468     @param vg_name: the configured VG name
2469     @type nimg: L{NodeImage}
2470     @param nimg: node image
2471
2472     """
2473     if vg_name is None:
2474       return
2475
2476     node = ninfo.name
2477     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2478
2479     # checks vg existence and size > 20G
2480     vglist = nresult.get(constants.NV_VGLIST, None)
2481     test = not vglist
2482     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2483     if not test:
2484       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2485                                             constants.MIN_VG_SIZE)
2486       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2487
2488     # Check PVs
2489     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2490     for em in errmsgs:
2491       self._Error(constants.CV_ENODELVM, node, em)
2492     if pvminmax is not None:
2493       (nimg.pv_min, nimg.pv_max) = pvminmax
2494
2495   def _VerifyGroupLVM(self, node_image, vg_name):
2496     """Check cross-node consistency in LVM.
2497
2498     @type node_image: dict
2499     @param node_image: info about nodes, mapping from node to names to
2500       L{NodeImage} objects
2501     @param vg_name: the configured VG name
2502
2503     """
2504     if vg_name is None:
2505       return
2506
2507     # Only exlcusive storage needs this kind of checks
2508     if not self._exclusive_storage:
2509       return
2510
2511     # exclusive_storage wants all PVs to have the same size (approximately),
2512     # if the smallest and the biggest ones are okay, everything is fine.
2513     # pv_min is None iff pv_max is None
2514     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2515     if not vals:
2516       return
2517     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2518     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2519     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2520     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2521                   "PV sizes differ too much in the group; smallest (%s MB) is"
2522                   " on %s, biggest (%s MB) is on %s",
2523                   pvmin, minnode, pvmax, maxnode)
2524
2525   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2526     """Check the node bridges.
2527
2528     @type ninfo: L{objects.Node}
2529     @param ninfo: the node to check
2530     @param nresult: the remote results for the node
2531     @param bridges: the expected list of bridges
2532
2533     """
2534     if not bridges:
2535       return
2536
2537     node = ninfo.name
2538     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539
2540     missing = nresult.get(constants.NV_BRIDGES, None)
2541     test = not isinstance(missing, list)
2542     _ErrorIf(test, constants.CV_ENODENET, node,
2543              "did not return valid bridge information")
2544     if not test:
2545       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2546                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2547
2548   def _VerifyNodeUserScripts(self, ninfo, nresult):
2549     """Check the results of user scripts presence and executability on the node
2550
2551     @type ninfo: L{objects.Node}
2552     @param ninfo: the node to check
2553     @param nresult: the remote results for the node
2554
2555     """
2556     node = ninfo.name
2557
2558     test = not constants.NV_USERSCRIPTS in nresult
2559     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2560                   "did not return user scripts information")
2561
2562     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2563     if not test:
2564       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2565                     "user scripts not present or not executable: %s" %
2566                     utils.CommaJoin(sorted(broken_scripts)))
2567
2568   def _VerifyNodeNetwork(self, ninfo, nresult):
2569     """Check the node network connectivity results.
2570
2571     @type ninfo: L{objects.Node}
2572     @param ninfo: the node to check
2573     @param nresult: the remote results for the node
2574
2575     """
2576     node = ninfo.name
2577     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2578
2579     test = constants.NV_NODELIST not in nresult
2580     _ErrorIf(test, constants.CV_ENODESSH, node,
2581              "node hasn't returned node ssh connectivity data")
2582     if not test:
2583       if nresult[constants.NV_NODELIST]:
2584         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2585           _ErrorIf(True, constants.CV_ENODESSH, node,
2586                    "ssh communication with node '%s': %s", a_node, a_msg)
2587
2588     test = constants.NV_NODENETTEST not in nresult
2589     _ErrorIf(test, constants.CV_ENODENET, node,
2590              "node hasn't returned node tcp connectivity data")
2591     if not test:
2592       if nresult[constants.NV_NODENETTEST]:
2593         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2594         for anode in nlist:
2595           _ErrorIf(True, constants.CV_ENODENET, node,
2596                    "tcp communication with node '%s': %s",
2597                    anode, nresult[constants.NV_NODENETTEST][anode])
2598
2599     test = constants.NV_MASTERIP not in nresult
2600     _ErrorIf(test, constants.CV_ENODENET, node,
2601              "node hasn't returned node master IP reachability data")
2602     if not test:
2603       if not nresult[constants.NV_MASTERIP]:
2604         if node == self.master_node:
2605           msg = "the master node cannot reach the master IP (not configured?)"
2606         else:
2607           msg = "cannot reach the master IP"
2608         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2609
2610   def _VerifyInstance(self, instance, inst_config, node_image,
2611                       diskstatus):
2612     """Verify an instance.
2613
2614     This function checks to see if the required block devices are
2615     available on the instance's node, and that the nodes are in the correct
2616     state.
2617
2618     """
2619     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620     pnode = inst_config.primary_node
2621     pnode_img = node_image[pnode]
2622     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2623
2624     node_vol_should = {}
2625     inst_config.MapLVsByNode(node_vol_should)
2626
2627     cluster = self.cfg.GetClusterInfo()
2628     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2629                                                             self.group_info)
2630     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2631     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2632              code=self.ETYPE_WARNING)
2633
2634     for node in node_vol_should:
2635       n_img = node_image[node]
2636       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2637         # ignore missing volumes on offline or broken nodes
2638         continue
2639       for volume in node_vol_should[node]:
2640         test = volume not in n_img.volumes
2641         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2642                  "volume %s missing on node %s", volume, node)
2643
2644     if inst_config.admin_state == constants.ADMINST_UP:
2645       test = instance not in pnode_img.instances and not pnode_img.offline
2646       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2647                "instance not running on its primary node %s",
2648                pnode)
2649       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2650                "instance is marked as running and lives on offline node %s",
2651                pnode)
2652
2653     diskdata = [(nname, success, status, idx)
2654                 for (nname, disks) in diskstatus.items()
2655                 for idx, (success, status) in enumerate(disks)]
2656
2657     for nname, success, bdev_status, idx in diskdata:
2658       # the 'ghost node' construction in Exec() ensures that we have a
2659       # node here
2660       snode = node_image[nname]
2661       bad_snode = snode.ghost or snode.offline
2662       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2663                not success and not bad_snode,
2664                constants.CV_EINSTANCEFAULTYDISK, instance,
2665                "couldn't retrieve status for disk/%s on %s: %s",
2666                idx, nname, bdev_status)
2667       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2668                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2669                constants.CV_EINSTANCEFAULTYDISK, instance,
2670                "disk/%s on %s is faulty", idx, nname)
2671
2672     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2673              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2674              " primary node failed", instance)
2675
2676     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2677              constants.CV_EINSTANCELAYOUT,
2678              instance, "instance has multiple secondary nodes: %s",
2679              utils.CommaJoin(inst_config.secondary_nodes),
2680              code=self.ETYPE_WARNING)
2681
2682     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2683       # Disk template not compatible with exclusive_storage: no instance
2684       # node should have the flag set
2685       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2686                                                      inst_config.all_nodes)
2687       es_nodes = [n for (n, es) in es_flags.items()
2688                   if es]
2689       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2690                "instance has template %s, which is not supported on nodes"
2691                " that have exclusive storage set: %s",
2692                inst_config.disk_template, utils.CommaJoin(es_nodes))
2693
2694     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2695       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2696       instance_groups = {}
2697
2698       for node in instance_nodes:
2699         instance_groups.setdefault(self.all_node_info[node].group,
2700                                    []).append(node)
2701
2702       pretty_list = [
2703         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2704         # Sort so that we always list the primary node first.
2705         for group, nodes in sorted(instance_groups.items(),
2706                                    key=lambda (_, nodes): pnode in nodes,
2707                                    reverse=True)]
2708
2709       self._ErrorIf(len(instance_groups) > 1,
2710                     constants.CV_EINSTANCESPLITGROUPS,
2711                     instance, "instance has primary and secondary nodes in"
2712                     " different groups: %s", utils.CommaJoin(pretty_list),
2713                     code=self.ETYPE_WARNING)
2714
2715     inst_nodes_offline = []
2716     for snode in inst_config.secondary_nodes:
2717       s_img = node_image[snode]
2718       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2719                snode, "instance %s, connection to secondary node failed",
2720                instance)
2721
2722       if s_img.offline:
2723         inst_nodes_offline.append(snode)
2724
2725     # warn that the instance lives on offline nodes
2726     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2727              "instance has offline secondary node(s) %s",
2728              utils.CommaJoin(inst_nodes_offline))
2729     # ... or ghost/non-vm_capable nodes
2730     for node in inst_config.all_nodes:
2731       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2732                instance, "instance lives on ghost node %s", node)
2733       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2734                instance, "instance lives on non-vm_capable node %s", node)
2735
2736   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2737     """Verify if there are any unknown volumes in the cluster.
2738
2739     The .os, .swap and backup volumes are ignored. All other volumes are
2740     reported as unknown.
2741
2742     @type reserved: L{ganeti.utils.FieldSet}
2743     @param reserved: a FieldSet of reserved volume names
2744
2745     """
2746     for node, n_img in node_image.items():
2747       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2748           self.all_node_info[node].group != self.group_uuid):
2749         # skip non-healthy nodes
2750         continue
2751       for volume in n_img.volumes:
2752         test = ((node not in node_vol_should or
2753                 volume not in node_vol_should[node]) and
2754                 not reserved.Matches(volume))
2755         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2756                       "volume %s is unknown", volume)
2757
2758   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2759     """Verify N+1 Memory Resilience.
2760
2761     Check that if one single node dies we can still start all the
2762     instances it was primary for.
2763
2764     """
2765     cluster_info = self.cfg.GetClusterInfo()
2766     for node, n_img in node_image.items():
2767       # This code checks that every node which is now listed as
2768       # secondary has enough memory to host all instances it is
2769       # supposed to should a single other node in the cluster fail.
2770       # FIXME: not ready for failover to an arbitrary node
2771       # FIXME: does not support file-backed instances
2772       # WARNING: we currently take into account down instances as well
2773       # as up ones, considering that even if they're down someone
2774       # might want to start them even in the event of a node failure.
2775       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2776         # we're skipping nodes marked offline and nodes in other groups from
2777         # the N+1 warning, since most likely we don't have good memory
2778         # infromation from them; we already list instances living on such
2779         # nodes, and that's enough warning
2780         continue
2781       #TODO(dynmem): also consider ballooning out other instances
2782       for prinode, instances in n_img.sbp.items():
2783         needed_mem = 0
2784         for instance in instances:
2785           bep = cluster_info.FillBE(instance_cfg[instance])
2786           if bep[constants.BE_AUTO_BALANCE]:
2787             needed_mem += bep[constants.BE_MINMEM]
2788         test = n_img.mfree < needed_mem
2789         self._ErrorIf(test, constants.CV_ENODEN1, node,
2790                       "not enough memory to accomodate instance failovers"
2791                       " should node %s fail (%dMiB needed, %dMiB available)",
2792                       prinode, needed_mem, n_img.mfree)
2793
2794   @classmethod
2795   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2796                    (files_all, files_opt, files_mc, files_vm)):
2797     """Verifies file checksums collected from all nodes.
2798
2799     @param errorif: Callback for reporting errors
2800     @param nodeinfo: List of L{objects.Node} objects
2801     @param master_node: Name of master node
2802     @param all_nvinfo: RPC results
2803
2804     """
2805     # Define functions determining which nodes to consider for a file
2806     files2nodefn = [
2807       (files_all, None),
2808       (files_mc, lambda node: (node.master_candidate or
2809                                node.name == master_node)),
2810       (files_vm, lambda node: node.vm_capable),
2811       ]
2812
2813     # Build mapping from filename to list of nodes which should have the file
2814     nodefiles = {}
2815     for (files, fn) in files2nodefn:
2816       if fn is None:
2817         filenodes = nodeinfo
2818       else:
2819         filenodes = filter(fn, nodeinfo)
2820       nodefiles.update((filename,
2821                         frozenset(map(operator.attrgetter("name"), filenodes)))
2822                        for filename in files)
2823
2824     assert set(nodefiles) == (files_all | files_mc | files_vm)
2825
2826     fileinfo = dict((filename, {}) for filename in nodefiles)
2827     ignore_nodes = set()
2828
2829     for node in nodeinfo:
2830       if node.offline:
2831         ignore_nodes.add(node.name)
2832         continue
2833
2834       nresult = all_nvinfo[node.name]
2835
2836       if nresult.fail_msg or not nresult.payload:
2837         node_files = None
2838       else:
2839         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2840         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2841                           for (key, value) in fingerprints.items())
2842         del fingerprints
2843
2844       test = not (node_files and isinstance(node_files, dict))
2845       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2846               "Node did not return file checksum data")
2847       if test:
2848         ignore_nodes.add(node.name)
2849         continue
2850
2851       # Build per-checksum mapping from filename to nodes having it
2852       for (filename, checksum) in node_files.items():
2853         assert filename in nodefiles
2854         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2855
2856     for (filename, checksums) in fileinfo.items():
2857       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2858
2859       # Nodes having the file
2860       with_file = frozenset(node_name
2861                             for nodes in fileinfo[filename].values()
2862                             for node_name in nodes) - ignore_nodes
2863
2864       expected_nodes = nodefiles[filename] - ignore_nodes
2865
2866       # Nodes missing file
2867       missing_file = expected_nodes - with_file
2868
2869       if filename in files_opt:
2870         # All or no nodes
2871         errorif(missing_file and missing_file != expected_nodes,
2872                 constants.CV_ECLUSTERFILECHECK, None,
2873                 "File %s is optional, but it must exist on all or no"
2874                 " nodes (not found on %s)",
2875                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2876       else:
2877         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2878                 "File %s is missing from node(s) %s", filename,
2879                 utils.CommaJoin(utils.NiceSort(missing_file)))
2880
2881         # Warn if a node has a file it shouldn't
2882         unexpected = with_file - expected_nodes
2883         errorif(unexpected,
2884                 constants.CV_ECLUSTERFILECHECK, None,
2885                 "File %s should not exist on node(s) %s",
2886                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2887
2888       # See if there are multiple versions of the file
2889       test = len(checksums) > 1
2890       if test:
2891         variants = ["variant %s on %s" %
2892                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2893                     for (idx, (checksum, nodes)) in
2894                       enumerate(sorted(checksums.items()))]
2895       else:
2896         variants = []
2897
2898       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2899               "File %s found with %s different checksums (%s)",
2900               filename, len(checksums), "; ".join(variants))
2901
2902   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2903                       drbd_map):
2904     """Verifies and the node DRBD status.
2905
2906     @type ninfo: L{objects.Node}
2907     @param ninfo: the node to check
2908     @param nresult: the remote results for the node
2909     @param instanceinfo: the dict of instances
2910     @param drbd_helper: the configured DRBD usermode helper
2911     @param drbd_map: the DRBD map as returned by
2912         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2913
2914     """
2915     node = ninfo.name
2916     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2917
2918     if drbd_helper:
2919       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2920       test = (helper_result is None)
2921       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2922                "no drbd usermode helper returned")
2923       if helper_result:
2924         status, payload = helper_result
2925         test = not status
2926         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2927                  "drbd usermode helper check unsuccessful: %s", payload)
2928         test = status and (payload != drbd_helper)
2929         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2930                  "wrong drbd usermode helper: %s", payload)
2931
2932     # compute the DRBD minors
2933     node_drbd = {}
2934     for minor, instance in drbd_map[node].items():
2935       test = instance not in instanceinfo
2936       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2937                "ghost instance '%s' in temporary DRBD map", instance)
2938         # ghost instance should not be running, but otherwise we
2939         # don't give double warnings (both ghost instance and
2940         # unallocated minor in use)
2941       if test:
2942         node_drbd[minor] = (instance, False)
2943       else:
2944         instance = instanceinfo[instance]
2945         node_drbd[minor] = (instance.name,
2946                             instance.admin_state == constants.ADMINST_UP)
2947
2948     # and now check them
2949     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2950     test = not isinstance(used_minors, (tuple, list))
2951     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2952              "cannot parse drbd status file: %s", str(used_minors))
2953     if test:
2954       # we cannot check drbd status
2955       return
2956
2957     for minor, (iname, must_exist) in node_drbd.items():
2958       test = minor not in used_minors and must_exist
2959       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2960                "drbd minor %d of instance %s is not active", minor, iname)
2961     for minor in used_minors:
2962       test = minor not in node_drbd
2963       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964                "unallocated drbd minor %d is in use", minor)
2965
2966   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2967     """Builds the node OS structures.
2968
2969     @type ninfo: L{objects.Node}
2970     @param ninfo: the node to check
2971     @param nresult: the remote results for the node
2972     @param nimg: the node image object
2973
2974     """
2975     node = ninfo.name
2976     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2977
2978     remote_os = nresult.get(constants.NV_OSLIST, None)
2979     test = (not isinstance(remote_os, list) or
2980             not compat.all(isinstance(v, list) and len(v) == 7
2981                            for v in remote_os))
2982
2983     _ErrorIf(test, constants.CV_ENODEOS, node,
2984              "node hasn't returned valid OS data")
2985
2986     nimg.os_fail = test
2987
2988     if test:
2989       return
2990
2991     os_dict = {}
2992
2993     for (name, os_path, status, diagnose,
2994          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2995
2996       if name not in os_dict:
2997         os_dict[name] = []
2998
2999       # parameters is a list of lists instead of list of tuples due to
3000       # JSON lacking a real tuple type, fix it:
3001       parameters = [tuple(v) for v in parameters]
3002       os_dict[name].append((os_path, status, diagnose,
3003                             set(variants), set(parameters), set(api_ver)))
3004
3005     nimg.oslist = os_dict
3006
3007   def _VerifyNodeOS(self, ninfo, nimg, base):
3008     """Verifies the node OS list.
3009
3010     @type ninfo: L{objects.Node}
3011     @param ninfo: the node to check
3012     @param nimg: the node image object
3013     @param base: the 'template' node we match against (e.g. from the master)
3014
3015     """
3016     node = ninfo.name
3017     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018
3019     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3020
3021     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3022     for os_name, os_data in nimg.oslist.items():
3023       assert os_data, "Empty OS status for OS %s?!" % os_name
3024       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3025       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3026                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3027       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3028                "OS '%s' has multiple entries (first one shadows the rest): %s",
3029                os_name, utils.CommaJoin([v[0] for v in os_data]))
3030       # comparisons with the 'base' image
3031       test = os_name not in base.oslist
3032       _ErrorIf(test, constants.CV_ENODEOS, node,
3033                "Extra OS %s not present on reference node (%s)",
3034                os_name, base.name)
3035       if test:
3036         continue
3037       assert base.oslist[os_name], "Base node has empty OS status?"
3038       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3039       if not b_status:
3040         # base OS is invalid, skipping
3041         continue
3042       for kind, a, b in [("API version", f_api, b_api),
3043                          ("variants list", f_var, b_var),
3044                          ("parameters", beautify_params(f_param),
3045                           beautify_params(b_param))]:
3046         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3047                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3048                  kind, os_name, base.name,
3049                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3050
3051     # check any missing OSes
3052     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3053     _ErrorIf(missing, constants.CV_ENODEOS, node,
3054              "OSes present on reference node %s but missing on this node: %s",
3055              base.name, utils.CommaJoin(missing))
3056
3057   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3058     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3059
3060     @type ninfo: L{objects.Node}
3061     @param ninfo: the node to check
3062     @param nresult: the remote results for the node
3063     @type is_master: bool
3064     @param is_master: Whether node is the master node
3065
3066     """
3067     node = ninfo.name
3068
3069     if (is_master and
3070         (constants.ENABLE_FILE_STORAGE or
3071          constants.ENABLE_SHARED_FILE_STORAGE)):
3072       try:
3073         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3074       except KeyError:
3075         # This should never happen
3076         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3077                       "Node did not return forbidden file storage paths")
3078       else:
3079         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3080                       "Found forbidden file storage paths: %s",
3081                       utils.CommaJoin(fspaths))
3082     else:
3083       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3084                     constants.CV_ENODEFILESTORAGEPATHS, node,
3085                     "Node should not have returned forbidden file storage"
3086                     " paths")
3087
3088   def _VerifyOob(self, ninfo, nresult):
3089     """Verifies out of band functionality of a node.
3090
3091     @type ninfo: L{objects.Node}
3092     @param ninfo: the node to check
3093     @param nresult: the remote results for the node
3094
3095     """
3096     node = ninfo.name
3097     # We just have to verify the paths on master and/or master candidates
3098     # as the oob helper is invoked on the master
3099     if ((ninfo.master_candidate or ninfo.master_capable) and
3100         constants.NV_OOB_PATHS in nresult):
3101       for path_result in nresult[constants.NV_OOB_PATHS]:
3102         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3103
3104   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3105     """Verifies and updates the node volume data.
3106
3107     This function will update a L{NodeImage}'s internal structures
3108     with data from the remote call.
3109
3110     @type ninfo: L{objects.Node}
3111     @param ninfo: the node to check
3112     @param nresult: the remote results for the node
3113     @param nimg: the node image object
3114     @param vg_name: the configured VG name
3115
3116     """
3117     node = ninfo.name
3118     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3119
3120     nimg.lvm_fail = True
3121     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3122     if vg_name is None:
3123       pass
3124     elif isinstance(lvdata, basestring):
3125       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3126                utils.SafeEncode(lvdata))
3127     elif not isinstance(lvdata, dict):
3128       _ErrorIf(True, constants.CV_ENODELVM, node,
3129                "rpc call to node failed (lvlist)")
3130     else:
3131       nimg.volumes = lvdata
3132       nimg.lvm_fail = False
3133
3134   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3135     """Verifies and updates the node instance list.
3136
3137     If the listing was successful, then updates this node's instance
3138     list. Otherwise, it marks the RPC call as failed for the instance
3139     list key.
3140
3141     @type ninfo: L{objects.Node}
3142     @param ninfo: the node to check
3143     @param nresult: the remote results for the node
3144     @param nimg: the node image object
3145
3146     """
3147     idata = nresult.get(constants.NV_INSTANCELIST, None)
3148     test = not isinstance(idata, list)
3149     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3150                   "rpc call to node failed (instancelist): %s",
3151                   utils.SafeEncode(str(idata)))
3152     if test:
3153       nimg.hyp_fail = True
3154     else:
3155       nimg.instances = idata
3156
3157   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3158     """Verifies and computes a node information map
3159
3160     @type ninfo: L{objects.Node}
3161     @param ninfo: the node to check
3162     @param nresult: the remote results for the node
3163     @param nimg: the node image object
3164     @param vg_name: the configured VG name
3165
3166     """
3167     node = ninfo.name
3168     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3169
3170     # try to read free memory (from the hypervisor)
3171     hv_info = nresult.get(constants.NV_HVINFO, None)
3172     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3173     _ErrorIf(test, constants.CV_ENODEHV, node,
3174              "rpc call to node failed (hvinfo)")
3175     if not test:
3176       try:
3177         nimg.mfree = int(hv_info["memory_free"])
3178       except (ValueError, TypeError):
3179         _ErrorIf(True, constants.CV_ENODERPC, node,
3180                  "node returned invalid nodeinfo, check hypervisor")
3181
3182     # FIXME: devise a free space model for file based instances as well
3183     if vg_name is not None:
3184       test = (constants.NV_VGLIST not in nresult or
3185               vg_name not in nresult[constants.NV_VGLIST])
3186       _ErrorIf(test, constants.CV_ENODELVM, node,
3187                "node didn't return data for the volume group '%s'"
3188                " - it is either missing or broken", vg_name)
3189       if not test:
3190         try:
3191           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3192         except (ValueError, TypeError):
3193           _ErrorIf(True, constants.CV_ENODERPC, node,
3194                    "node returned invalid LVM info, check LVM status")
3195
3196   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3197     """Gets per-disk status information for all instances.
3198
3199     @type nodelist: list of strings
3200     @param nodelist: Node names
3201     @type node_image: dict of (name, L{objects.Node})
3202     @param node_image: Node objects
3203     @type instanceinfo: dict of (name, L{objects.Instance})
3204     @param instanceinfo: Instance objects
3205     @rtype: {instance: {node: [(succes, payload)]}}
3206     @return: a dictionary of per-instance dictionaries with nodes as
3207         keys and disk information as values; the disk information is a
3208         list of tuples (success, payload)
3209
3210     """
3211     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3212
3213     node_disks = {}
3214     node_disks_devonly = {}
3215     diskless_instances = set()
3216     diskless = constants.DT_DISKLESS
3217
3218     for nname in nodelist:
3219       node_instances = list(itertools.chain(node_image[nname].pinst,
3220                                             node_image[nname].sinst))
3221       diskless_instances.update(inst for inst in node_instances
3222                                 if instanceinfo[inst].disk_template == diskless)
3223       disks = [(inst, disk)
3224                for inst in node_instances
3225                for disk in instanceinfo[inst].disks]
3226
3227       if not disks:
3228         # No need to collect data
3229         continue
3230
3231       node_disks[nname] = disks
3232
3233       # _AnnotateDiskParams makes already copies of the disks
3234       devonly = []
3235       for (inst, dev) in disks:
3236         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3237         self.cfg.SetDiskID(anno_disk, nname)
3238         devonly.append(anno_disk)
3239
3240       node_disks_devonly[nname] = devonly
3241
3242     assert len(node_disks) == len(node_disks_devonly)
3243
3244     # Collect data from all nodes with disks
3245     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3246                                                           node_disks_devonly)
3247
3248     assert len(result) == len(node_disks)
3249
3250     instdisk = {}
3251
3252     for (nname, nres) in result.items():
3253       disks = node_disks[nname]
3254
3255       if nres.offline:
3256         # No data from this node
3257         data = len(disks) * [(False, "node offline")]
3258       else:
3259         msg = nres.fail_msg
3260         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3261                  "while getting disk information: %s", msg)
3262         if msg:
3263           # No data from this node
3264           data = len(disks) * [(False, msg)]
3265         else:
3266           data = []
3267           for idx, i in enumerate(nres.payload):
3268             if isinstance(i, (tuple, list)) and len(i) == 2:
3269               data.append(i)
3270             else:
3271               logging.warning("Invalid result from node %s, entry %d: %s",
3272                               nname, idx, i)
3273               data.append((False, "Invalid result from the remote node"))
3274
3275       for ((inst, _), status) in zip(disks, data):
3276         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3277
3278     # Add empty entries for diskless instances.
3279     for inst in diskless_instances:
3280       assert inst not in instdisk
3281       instdisk[inst] = {}
3282
3283     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3284                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3285                       compat.all(isinstance(s, (tuple, list)) and
3286                                  len(s) == 2 for s in statuses)
3287                       for inst, nnames in instdisk.items()
3288                       for nname, statuses in nnames.items())
3289     if __debug__:
3290       instdisk_keys = set(instdisk)
3291       instanceinfo_keys = set(instanceinfo)
3292       assert instdisk_keys == instanceinfo_keys, \
3293         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3294          (instdisk_keys, instanceinfo_keys))
3295
3296     return instdisk
3297
3298   @staticmethod
3299   def _SshNodeSelector(group_uuid, all_nodes):
3300     """Create endless iterators for all potential SSH check hosts.
3301
3302     """
3303     nodes = [node for node in all_nodes
3304              if (node.group != group_uuid and
3305                  not node.offline)]
3306     keyfunc = operator.attrgetter("group")
3307
3308     return map(itertools.cycle,
3309                [sorted(map(operator.attrgetter("name"), names))
3310                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3311                                                   keyfunc)])
3312
3313   @classmethod
3314   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3315     """Choose which nodes should talk to which other nodes.
3316
3317     We will make nodes contact all nodes in their group, and one node from
3318     every other group.
3319
3320     @warning: This algorithm has a known issue if one node group is much
3321       smaller than others (e.g. just one node). In such a case all other
3322       nodes will talk to the single node.
3323
3324     """
3325     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3326     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3327
3328     return (online_nodes,
3329             dict((name, sorted([i.next() for i in sel]))
3330                  for name in online_nodes))
3331
3332   def BuildHooksEnv(self):
3333     """Build hooks env.
3334
3335     Cluster-Verify hooks just ran in the post phase and their failure makes
3336     the output be logged in the verify output and the verification to fail.
3337
3338     """
3339     env = {
3340       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3341       }
3342
3343     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3344                for node in self.my_node_info.values())
3345
3346     return env
3347
3348   def BuildHooksNodes(self):
3349     """Build hooks nodes.
3350
3351     """
3352     return ([], self.my_node_names)
3353
3354   def Exec(self, feedback_fn):
3355     """Verify integrity of the node group, performing various test on nodes.
3356
3357     """
3358     # This method has too many local variables. pylint: disable=R0914
3359     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3360
3361     if not self.my_node_names:
3362       # empty node group
3363       feedback_fn("* Empty node group, skipping verification")
3364       return True
3365
3366     self.bad = False
3367     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3368     verbose = self.op.verbose
3369     self._feedback_fn = feedback_fn
3370
3371     vg_name = self.cfg.GetVGName()
3372     drbd_helper = self.cfg.GetDRBDHelper()
3373     cluster = self.cfg.GetClusterInfo()
3374     hypervisors = cluster.enabled_hypervisors
3375     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3376
3377     i_non_redundant = [] # Non redundant instances
3378     i_non_a_balanced = [] # Non auto-balanced instances
3379     i_offline = 0 # Count of offline instances
3380     n_offline = 0 # Count of offline nodes
3381     n_drained = 0 # Count of nodes being drained
3382     node_vol_should = {}
3383
3384     # FIXME: verify OS list
3385
3386     # File verification
3387     filemap = _ComputeAncillaryFiles(cluster, False)
3388
3389     # do local checksums
3390     master_node = self.master_node = self.cfg.GetMasterNode()
3391     master_ip = self.cfg.GetMasterIP()
3392
3393     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3394
3395     user_scripts = []
3396     if self.cfg.GetUseExternalMipScript():
3397       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3398
3399     node_verify_param = {
3400       constants.NV_FILELIST:
3401         map(vcluster.MakeVirtualPath,
3402             utils.UniqueSequence(filename
3403                                  for files in filemap
3404                                  for filename in files)),
3405       constants.NV_NODELIST:
3406         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3407                                   self.all_node_info.values()),
3408       constants.NV_HYPERVISOR: hypervisors,
3409       constants.NV_HVPARAMS:
3410         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3411       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3412                                  for node in node_data_list
3413                                  if not node.offline],
3414       constants.NV_INSTANCELIST: hypervisors,
3415       constants.NV_VERSION: None,
3416       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3417       constants.NV_NODESETUP: None,
3418       constants.NV_TIME: None,
3419       constants.NV_MASTERIP: (master_node, master_ip),
3420       constants.NV_OSLIST: None,
3421       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3422       constants.NV_USERSCRIPTS: user_scripts,
3423       }
3424
3425     if vg_name is not None:
3426       node_verify_param[constants.NV_VGLIST] = None
3427       node_verify_param[constants.NV_LVLIST] = vg_name
3428       node_verify_param[constants.NV_PVLIST] = [vg_name]
3429
3430     if drbd_helper:
3431       node_verify_param[constants.NV_DRBDLIST] = None
3432       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3433
3434     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3435       # Load file storage paths only from master node
3436       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3437
3438     # bridge checks
3439     # FIXME: this needs to be changed per node-group, not cluster-wide
3440     bridges = set()
3441     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3442     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3443       bridges.add(default_nicpp[constants.NIC_LINK])
3444     for instance in self.my_inst_info.values():
3445       for nic in instance.nics:
3446         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3447         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3448           bridges.add(full_nic[constants.NIC_LINK])
3449
3450     if bridges:
3451       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3452
3453     # Build our expected cluster state
3454     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3455                                                  name=node.name,
3456                                                  vm_capable=node.vm_capable))
3457                       for node in node_data_list)
3458
3459     # Gather OOB paths
3460     oob_paths = []
3461     for node in self.all_node_info.values():
3462       path = _SupportsOob(self.cfg, node)
3463       if path and path not in oob_paths:
3464         oob_paths.append(path)
3465
3466     if oob_paths:
3467       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3468
3469     for instance in self.my_inst_names:
3470       inst_config = self.my_inst_info[instance]
3471       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3472         i_offline += 1
3473
3474       for nname in inst_config.all_nodes:
3475         if nname not in node_image:
3476           gnode = self.NodeImage(name=nname)
3477           gnode.ghost = (nname not in self.all_node_info)
3478           node_image[nname] = gnode
3479
3480       inst_config.MapLVsByNode(node_vol_should)
3481
3482       pnode = inst_config.primary_node
3483       node_image[pnode].pinst.append(instance)
3484
3485       for snode in inst_config.secondary_nodes:
3486         nimg = node_image[snode]
3487         nimg.sinst.append(instance)
3488         if pnode not in nimg.sbp:
3489           nimg.sbp[pnode] = []
3490         nimg.sbp[pnode].append(instance)
3491
3492     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3493     es_unset_nodes = []
3494     # The value of exclusive_storage should be the same across the group, so if
3495     # it's True for at least a node, we act as if it were set for all the nodes
3496     self._exclusive_storage = compat.any(es_flags.values())
3497     if self._exclusive_storage:
3498       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3499       es_unset_nodes = [n for (n, es) in es_flags.items()
3500                         if not es]
3501
3502     if es_unset_nodes:
3503       self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3504                   "The exclusive_storage flag should be uniform in a group,"
3505                   " but these nodes have it unset: %s",
3506                   utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3507       self.LogWarning("Some checks required by exclusive storage will be"
3508                       " performed also on nodes with the flag unset")
3509
3510     # At this point, we have the in-memory data structures complete,
3511     # except for the runtime information, which we'll gather next
3512
3513     # Due to the way our RPC system works, exact response times cannot be
3514     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3515     # time before and after executing the request, we can at least have a time
3516     # window.
3517     nvinfo_starttime = time.time()
3518     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3519                                            node_verify_param,
3520                                            self.cfg.GetClusterName())
3521     nvinfo_endtime = time.time()
3522
3523     if self.extra_lv_nodes and vg_name is not None:
3524       extra_lv_nvinfo = \
3525           self.rpc.call_node_verify(self.extra_lv_nodes,
3526                                     {constants.NV_LVLIST: vg_name},
3527                                     self.cfg.GetClusterName())
3528     else:
3529       extra_lv_nvinfo = {}
3530
3531     all_drbd_map = self.cfg.ComputeDRBDMap()
3532
3533     feedback_fn("* Gathering disk information (%s nodes)" %
3534                 len(self.my_node_names))
3535     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3536                                      self.my_inst_info)
3537
3538     feedback_fn("* Verifying configuration file consistency")
3539
3540     # If not all nodes are being checked, we need to make sure the master node
3541     # and a non-checked vm_capable node are in the list.
3542     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3543     if absent_nodes:
3544       vf_nvinfo = all_nvinfo.copy()
3545       vf_node_info = list(self.my_node_info.values())
3546       additional_nodes = []
3547       if master_node not in self.my_node_info:
3548         additional_nodes.append(master_node)
3549         vf_node_info.append(self.all_node_info[master_node])
3550       # Add the first vm_capable node we find which is not included,
3551       # excluding the master node (which we already have)
3552       for node in absent_nodes:
3553         nodeinfo = self.all_node_info[node]
3554         if (nodeinfo.vm_capable and not nodeinfo.offline and
3555             node != master_node):
3556           additional_nodes.append(node)
3557           vf_node_info.append(self.all_node_info[node])
3558           break
3559       key = constants.NV_FILELIST
3560       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3561                                                  {key: node_verify_param[key]},
3562                                                  self.cfg.GetClusterName()))
3563     else:
3564       vf_nvinfo = all_nvinfo
3565       vf_node_info = self.my_node_info.values()
3566
3567     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3568
3569     feedback_fn("* Verifying node status")
3570
3571     refos_img = None
3572
3573     for node_i in node_data_list:
3574       node = node_i.name
3575       nimg = node_image[node]
3576
3577       if node_i.offline:
3578         if verbose:
3579           feedback_fn("* Skipping offline node %s" % (node,))
3580         n_offline += 1
3581         continue
3582
3583       if node == master_node:
3584         ntype = "master"
3585       elif node_i.master_candidate:
3586         ntype = "master candidate"
3587       elif node_i.drained:
3588         ntype = "drained"
3589         n_drained += 1
3590       else:
3591         ntype = "regular"
3592       if verbose:
3593         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3594
3595       msg = all_nvinfo[node].fail_msg
3596       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3597                msg)
3598       if msg:
3599         nimg.rpc_fail = True
3600         continue
3601
3602       nresult = all_nvinfo[node].payload
3603
3604       nimg.call_ok = self._VerifyNode(node_i, nresult)
3605       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3606       self._VerifyNodeNetwork(node_i, nresult)
3607       self._VerifyNodeUserScripts(node_i, nresult)
3608       self._VerifyOob(node_i, nresult)
3609       self._VerifyFileStoragePaths(node_i, nresult,
3610                                    node == master_node)
3611
3612       if nimg.vm_capable:
3613         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3614         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3615                              all_drbd_map)
3616
3617         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3618         self._UpdateNodeInstances(node_i, nresult, nimg)
3619         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3620         self._UpdateNodeOS(node_i, nresult, nimg)
3621
3622         if not nimg.os_fail:
3623           if refos_img is None:
3624             refos_img = nimg
3625           self._VerifyNodeOS(node_i, nimg, refos_img)
3626         self._VerifyNodeBridges(node_i, nresult, bridges)
3627
3628         # Check whether all running instancies are primary for the node. (This
3629         # can no longer be done from _VerifyInstance below, since some of the
3630         # wrong instances could be from other node groups.)
3631         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3632
3633         for inst in non_primary_inst:
3634           test = inst in self.all_inst_info
3635           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3636                    "instance should not run on node %s", node_i.name)
3637           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3638                    "node is running unknown instance %s", inst)
3639
3640     self._VerifyGroupLVM(node_image, vg_name)
3641
3642     for node, result in extra_lv_nvinfo.items():
3643       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3644                               node_image[node], vg_name)
3645
3646     feedback_fn("* Verifying instance status")
3647     for instance in self.my_inst_names:
3648       if verbose:
3649         feedback_fn("* Verifying instance %s" % instance)
3650       inst_config = self.my_inst_info[instance]
3651       self._VerifyInstance(instance, inst_config, node_image,
3652                            instdisk[instance])
3653
3654       # If the instance is non-redundant we cannot survive losing its primary
3655       # node, so we are not N+1 compliant.
3656       if inst_config.disk_template not in constants.DTS_MIRRORED:
3657         i_non_redundant.append(instance)
3658
3659       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3660         i_non_a_balanced.append(instance)
3661
3662     feedback_fn("* Verifying orphan volumes")
3663     reserved = utils.FieldSet(*cluster.reserved_lvs)
3664
3665     # We will get spurious "unknown volume" warnings if any node of this group
3666     # is secondary for an instance whose primary is in another group. To avoid
3667     # them, we find these instances and add their volumes to node_vol_should.
3668     for inst in self.all_inst_info.values():
3669       for secondary in inst.secondary_nodes:
3670         if (secondary in self.my_node_info
3671             and inst.name not in self.my_inst_info):
3672           inst.MapLVsByNode(node_vol_should)
3673           break
3674
3675     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3676
3677     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3678       feedback_fn("* Verifying N+1 Memory redundancy")
3679       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3680
3681     feedback_fn("* Other Notes")
3682     if i_non_redundant:
3683       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3684                   % len(i_non_redundant))
3685
3686     if i_non_a_balanced:
3687       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3688                   % len(i_non_a_balanced))
3689
3690     if i_offline:
3691       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3692
3693     if n_offline:
3694       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3695
3696     if n_drained:
3697       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3698
3699     return not self.bad
3700
3701   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3702     """Analyze the post-hooks' result
3703
3704     This method analyses the hook result, handles it, and sends some
3705     nicely-formatted feedback back to the user.
3706
3707     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3708         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3709     @param hooks_results: the results of the multi-node hooks rpc call
3710     @param feedback_fn: function used send feedback back to the caller
3711     @param lu_result: previous Exec result
3712     @return: the new Exec result, based on the previous result
3713         and hook results
3714
3715     """
3716     # We only really run POST phase hooks, only for non-empty groups,
3717     # and are only interested in their results
3718     if not self.my_node_names:
3719       # empty node group
3720       pass
3721     elif phase == constants.HOOKS_PHASE_POST:
3722       # Used to change hooks' output to proper indentation
3723       feedback_fn("* Hooks Results")
3724       assert hooks_results, "invalid result from hooks"
3725
3726       for node_name in hooks_results:
3727         res = hooks_results[node_name]
3728         msg = res.fail_msg
3729         test = msg and not res.offline
3730         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3731                       "Communication failure in hooks execution: %s", msg)
3732         if res.offline or msg:
3733           # No need to investigate payload if node is offline or gave
3734           # an error.
3735           continue
3736         for script, hkr, output in res.payload:
3737           test = hkr == constants.HKR_FAIL
3738           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3739                         "Script %s failed, output:", script)
3740           if test:
3741             output = self._HOOKS_INDENT_RE.sub("      ", output)
3742             feedback_fn("%s" % output)
3743             lu_result = False
3744
3745     return lu_result
3746
3747
3748 class LUClusterVerifyDisks(NoHooksLU):
3749   """Verifies the cluster disks status.
3750
3751   """
3752   REQ_BGL = False
3753
3754   def ExpandNames(self):
3755     self.share_locks = _ShareAll()
3756     self.needed_locks = {
3757       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3758       }
3759
3760   def Exec(self, feedback_fn):
3761     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3762
3763     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3764     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3765                            for group in group_names])
3766
3767
3768 class LUGroupVerifyDisks(NoHooksLU):
3769   """Verifies the status of all disks in a node group.
3770
3771   """
3772   REQ_BGL = False
3773
3774   def ExpandNames(self):
3775     # Raises errors.OpPrereqError on its own if group can't be found
3776     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3777
3778     self.share_locks = _ShareAll()
3779     self.needed_locks = {
3780       locking.LEVEL_INSTANCE: [],
3781       locking.LEVEL_NODEGROUP: [],
3782       locking.LEVEL_NODE: [],
3783
3784       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3785       # starts one instance of this opcode for every group, which means all
3786       # nodes will be locked for a short amount of time, so it's better to
3787       # acquire the node allocation lock as well.
3788       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3789       }
3790
3791   def DeclareLocks(self, level):
3792     if level == locking.LEVEL_INSTANCE:
3793       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3794
3795       # Lock instances optimistically, needs verification once node and group
3796       # locks have been acquired
3797       self.needed_locks[locking.LEVEL_INSTANCE] = \
3798         self.cfg.GetNodeGroupInstances(self.group_uuid)
3799
3800     elif level == locking.LEVEL_NODEGROUP:
3801       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3802
3803       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3804         set([self.group_uuid] +
3805             # Lock all groups used by instances optimistically; this requires
3806             # going via the node before it's locked, requiring verification
3807             # later on
3808             [group_uuid
3809              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3810              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3811
3812     elif level == locking.LEVEL_NODE:
3813       # This will only lock the nodes in the group to be verified which contain
3814       # actual instances
3815       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3816       self._LockInstancesNodes()
3817
3818       # Lock all nodes in group to be verified
3819       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3820       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3821       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3822
3823   def CheckPrereq(self):
3824     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3825     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3826     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3827
3828     assert self.group_uuid in owned_groups
3829
3830     # Check if locked instances are still correct
3831     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3832
3833     # Get instance information
3834     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3835
3836     # Check if node groups for locked instances are still correct
3837     _CheckInstancesNodeGroups(self.cfg, self.instances,
3838                               owned_groups, owned_nodes, self.group_uuid)
3839
3840   def Exec(self, feedback_fn):
3841     """Verify integrity of cluster disks.
3842
3843     @rtype: tuple of three items
3844     @return: a tuple of (dict of node-to-node_error, list of instances
3845         which need activate-disks, dict of instance: (node, volume) for
3846         missing volumes
3847
3848     """
3849     res_nodes = {}
3850     res_instances = set()
3851     res_missing = {}
3852
3853     nv_dict = _MapInstanceDisksToNodes(
3854       [inst for inst in self.instances.values()
3855        if inst.admin_state == constants.ADMINST_UP])
3856
3857     if nv_dict:
3858       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3859                              set(self.cfg.GetVmCapableNodeList()))
3860
3861       node_lvs = self.rpc.call_lv_list(nodes, [])
3862
3863       for (node, node_res) in node_lvs.items():
3864         if node_res.offline:
3865           continue
3866
3867         msg = node_res.fail_msg
3868         if msg:
3869           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3870           res_nodes[node] = msg
3871           continue
3872
3873         for lv_name, (_, _, lv_online) in node_res.payload.items():
3874           inst = nv_dict.pop((node, lv_name), None)
3875           if not (lv_online or inst is None):
3876             res_instances.add(inst)
3877
3878       # any leftover items in nv_dict are missing LVs, let's arrange the data
3879       # better
3880       for key, inst in nv_dict.iteritems():
3881         res_missing.setdefault(inst, []).append(list(key))
3882
3883     return (res_nodes, list(res_instances), res_missing)
3884
3885
3886 class LUClusterRepairDiskSizes(NoHooksLU):
3887   """Verifies the cluster disks sizes.
3888
3889   """
3890   REQ_BGL = False
3891
3892   def ExpandNames(self):
3893     if self.op.instances:
3894       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3895       # Not getting the node allocation lock as only a specific set of
3896       # instances (and their nodes) is going to be acquired
3897       self.needed_locks = {
3898         locking.LEVEL_NODE_RES: [],
3899         locking.LEVEL_INSTANCE: self.wanted_names,
3900         }
3901       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3902     else:
3903       self.wanted_names = None
3904       self.needed_locks = {
3905         locking.LEVEL_NODE_RES: locking.ALL_SET,
3906         locking.LEVEL_INSTANCE: locking.ALL_SET,
3907
3908         # This opcode is acquires the node locks for all instances
3909         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3910         }
3911
3912     self.share_locks = {
3913       locking.LEVEL_NODE_RES: 1,
3914       locking.LEVEL_INSTANCE: 0,
3915       locking.LEVEL_NODE_ALLOC: 1,
3916       }
3917
3918   def DeclareLocks(self, level):
3919     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3920       self._LockInstancesNodes(primary_only=True, level=level)
3921
3922   def CheckPrereq(self):
3923     """Check prerequisites.
3924
3925     This only checks the optional instance list against the existing names.
3926
3927     """
3928     if self.wanted_names is None:
3929       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3930
3931     self.wanted_instances = \
3932         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3933
3934   def _EnsureChildSizes(self, disk):
3935     """Ensure children of the disk have the needed disk size.
3936
3937     This is valid mainly for DRBD8 and fixes an issue where the
3938     children have smaller disk size.
3939
3940     @param disk: an L{ganeti.objects.Disk} object
3941
3942     """
3943     if disk.dev_type == constants.LD_DRBD8:
3944       assert disk.children, "Empty children for DRBD8?"
3945       fchild = disk.children[0]
3946       mismatch = fchild.size < disk.size
3947       if mismatch:
3948         self.LogInfo("Child disk has size %d, parent %d, fixing",
3949                      fchild.size, disk.size)
3950         fchild.size = disk.size
3951
3952       # and we recurse on this child only, not on the metadev
3953       return self._EnsureChildSizes(fchild) or mismatch
3954     else:
3955       return False
3956
3957   def Exec(self, feedback_fn):
3958     """Verify the size of cluster disks.
3959
3960     """
3961     # TODO: check child disks too
3962     # TODO: check differences in size between primary/secondary nodes
3963     per_node_disks = {}
3964     for instance in self.wanted_instances:
3965       pnode = instance.primary_node
3966       if pnode not in per_node_disks:
3967         per_node_disks[pnode] = []
3968       for idx, disk in enumerate(instance.disks):
3969         per_node_disks[pnode].append((instance, idx, disk))
3970
3971     assert not (frozenset(per_node_disks.keys()) -
3972                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3973       "Not owning correct locks"
3974     assert not self.owned_locks(locking.LEVEL_NODE)
3975
3976     changed = []
3977     for node, dskl in per_node_disks.items():
3978       newl = [v[2].Copy() for v in dskl]
3979       for dsk in newl:
3980         self.cfg.SetDiskID(dsk, node)
3981       result = self.rpc.call_blockdev_getsize(node, newl)
3982       if result.fail_msg:
3983         self.LogWarning("Failure in blockdev_getsize call to node"
3984                         " %s, ignoring", node)
3985         continue
3986       if len(result.payload) != len(dskl):
3987         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3988                         " result.payload=%s", node, len(dskl), result.payload)
3989         self.LogWarning("Invalid result from node %s, ignoring node results",
3990                         node)
3991         continue
3992       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3993         if size is None:
3994           self.LogWarning("Disk %d of instance %s did not return size"
3995                           " information, ignoring", idx, instance.name)
3996           continue
3997         if not isinstance(size, (int, long)):
3998           self.LogWarning("Disk %d of instance %s did not return valid"
3999                           " size information, ignoring", idx, instance.name)
4000           continue
4001         size = size >> 20
4002         if size != disk.size:
4003           self.LogInfo("Disk %d of instance %s has mismatched size,"
4004                        " correcting: recorded %d, actual %d", idx,
4005                        instance.name, disk.size, size)
4006           disk.size = size
4007           self.cfg.Update(instance, feedback_fn)
4008           changed.append((instance.name, idx, size))
4009         if self._EnsureChildSizes(disk):
4010           self.cfg.Update(instance, feedback_fn)
4011           changed.append((instance.name, idx, disk.size))
4012     return changed
4013
4014
4015 class LUClusterRename(LogicalUnit):
4016   """Rename the cluster.
4017
4018   """
4019   HPATH = "cluster-rename"
4020   HTYPE = constants.HTYPE_CLUSTER
4021
4022   def BuildHooksEnv(self):
4023     """Build hooks env.
4024
4025     """
4026     return {
4027       "OP_TARGET": self.cfg.GetClusterName(),
4028       "NEW_NAME": self.op.name,
4029       }
4030
4031   def BuildHooksNodes(self):
4032     """Build hooks nodes.
4033
4034     """
4035     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4036
4037   def CheckPrereq(self):
4038     """Verify that the passed name is a valid one.
4039
4040     """
4041     hostname = netutils.GetHostname(name=self.op.name,
4042                                     family=self.cfg.GetPrimaryIPFamily())
4043
4044     new_name = hostname.name
4045     self.ip = new_ip = hostname.ip
4046     old_name = self.cfg.GetClusterName()
4047     old_ip = self.cfg.GetMasterIP()
4048     if new_name == old_name and new_ip == old_ip:
4049       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4050                                  " cluster has changed",
4051                                  errors.ECODE_INVAL)
4052     if new_ip != old_ip:
4053       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4054         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4055                                    " reachable on the network" %
4056                                    new_ip, errors.ECODE_NOTUNIQUE)
4057
4058     self.op.name = new_name
4059
4060   def Exec(self, feedback_fn):
4061     """Rename the cluster.
4062
4063     """
4064     clustername = self.op.name
4065     new_ip = self.ip
4066
4067     # shutdown the master IP
4068     master_params = self.cfg.GetMasterNetworkParameters()
4069     ems = self.cfg.GetUseExternalMipScript()
4070     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4071                                                      master_params, ems)
4072     result.Raise("Could not disable the master role")
4073
4074     try:
4075       cluster = self.cfg.GetClusterInfo()
4076       cluster.cluster_name = clustername
4077       cluster.master_ip = new_ip
4078       self.cfg.Update(cluster, feedback_fn)
4079
4080       # update the known hosts file
4081       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4082       node_list = self.cfg.GetOnlineNodeList()
4083       try:
4084         node_list.remove(master_params.name)
4085       except ValueError:
4086         pass
4087       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4088     finally:
4089       master_params.ip = new_ip
4090       result = self.rpc.call_node_activate_master_ip(master_params.name,
4091                                                      master_params, ems)
4092       msg = result.fail_msg
4093       if msg:
4094         self.LogWarning("Could not re-enable the master role on"
4095                         " the master, please restart manually: %s", msg)
4096
4097     return clustername
4098
4099
4100 def _ValidateNetmask(cfg, netmask):
4101   """Checks if a netmask is valid.
4102
4103   @type cfg: L{config.ConfigWriter}
4104   @param cfg: The cluster configuration
4105   @type netmask: int
4106   @param netmask: the netmask to be verified
4107   @raise errors.OpPrereqError: if the validation fails
4108
4109   """
4110   ip_family = cfg.GetPrimaryIPFamily()
4111   try:
4112     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4113   except errors.ProgrammerError:
4114     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4115                                ip_family, errors.ECODE_INVAL)
4116   if not ipcls.ValidateNetmask(netmask):
4117     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4118                                 (netmask), errors.ECODE_INVAL)
4119
4120
4121 class LUClusterSetParams(LogicalUnit):
4122   """Change the parameters of the cluster.
4123
4124   """
4125   HPATH = "cluster-modify"
4126   HTYPE = constants.HTYPE_CLUSTER
4127   REQ_BGL = False
4128
4129   def CheckArguments(self):
4130     """Check parameters
4131
4132     """
4133     if self.op.uid_pool:
4134       uidpool.CheckUidPool(self.op.uid_pool)
4135
4136     if self.op.add_uids:
4137       uidpool.CheckUidPool(self.op.add_uids)
4138
4139     if self.op.remove_uids:
4140       uidpool.CheckUidPool(self.op.remove_uids)
4141
4142     if self.op.master_netmask is not None:
4143       _ValidateNetmask(self.cfg, self.op.master_netmask)
4144
4145     if self.op.diskparams:
4146       for dt_params in self.op.diskparams.values():
4147         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4148       try:
4149         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4150       except errors.OpPrereqError, err:
4151         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4152                                    errors.ECODE_INVAL)
4153
4154   def ExpandNames(self):
4155     # FIXME: in the future maybe other cluster params won't require checking on
4156     # all nodes to be modified.
4157     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4158     # resource locks the right thing, shouldn't it be the BGL instead?
4159     self.needed_locks = {
4160       locking.LEVEL_NODE: locking.ALL_SET,
4161       locking.LEVEL_INSTANCE: locking.ALL_SET,
4162       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4163       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4164     }
4165     self.share_locks = _ShareAll()
4166
4167   def BuildHooksEnv(self):
4168     """Build hooks env.
4169
4170     """
4171     return {
4172       "OP_TARGET": self.cfg.GetClusterName(),
4173       "NEW_VG_NAME": self.op.vg_name,
4174       }
4175
4176   def BuildHooksNodes(self):
4177     """Build hooks nodes.
4178
4179     """
4180     mn = self.cfg.GetMasterNode()
4181     return ([mn], [mn])
4182
4183   def CheckPrereq(self):
4184     """Check prerequisites.
4185
4186     This checks whether the given params don't conflict and
4187     if the given volume group is valid.
4188
4189     """
4190     if self.op.vg_name is not None and not self.op.vg_name:
4191       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4192         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4193                                    " instances exist", errors.ECODE_INVAL)
4194
4195     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4196       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4197         raise errors.OpPrereqError("Cannot disable drbd helper while"
4198                                    " drbd-based instances exist",
4199                                    errors.ECODE_INVAL)
4200
4201     node_list = self.owned_locks(locking.LEVEL_NODE)
4202
4203     # if vg_name not None, checks given volume group on all nodes
4204     if self.op.vg_name:
4205       vglist = self.rpc.call_vg_list(node_list)
4206       for node in node_list:
4207         msg = vglist[node].fail_msg
4208         if msg:
4209           # ignoring down node
4210           self.LogWarning("Error while gathering data on node %s"
4211                           " (ignoring node): %s", node, msg)
4212           continue
4213         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4214                                               self.op.vg_name,
4215                                               constants.MIN_VG_SIZE)
4216         if vgstatus:
4217           raise errors.OpPrereqError("Error on node '%s': %s" %
4218                                      (node, vgstatus), errors.ECODE_ENVIRON)
4219
4220     if self.op.drbd_helper:
4221       # checks given drbd helper on all nodes
4222       helpers = self.rpc.call_drbd_helper(node_list)
4223       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4224         if ninfo.offline:
4225           self.LogInfo("Not checking drbd helper on offline node %s", node)
4226           continue
4227         msg = helpers[node].fail_msg
4228         if msg:
4229           raise errors.OpPrereqError("Error checking drbd helper on node"
4230                                      " '%s': %s" % (node, msg),
4231                                      errors.ECODE_ENVIRON)
4232         node_helper = helpers[node].payload
4233         if node_helper != self.op.drbd_helper:
4234           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4235                                      (node, node_helper), errors.ECODE_ENVIRON)
4236
4237     self.cluster = cluster = self.cfg.GetClusterInfo()
4238     # validate params changes
4239     if self.op.beparams:
4240       objects.UpgradeBeParams(self.op.beparams)
4241       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4242       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4243
4244     if self.op.ndparams:
4245       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4246       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4247
4248       # TODO: we need a more general way to handle resetting
4249       # cluster-level parameters to default values
4250       if self.new_ndparams["oob_program"] == "":
4251         self.new_ndparams["oob_program"] = \
4252             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4253
4254     if self.op.hv_state:
4255       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4256                                             self.cluster.hv_state_static)
4257       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4258                                for hv, values in new_hv_state.items())
4259
4260     if self.op.disk_state:
4261       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4262                                                 self.cluster.disk_state_static)
4263       self.new_disk_state = \
4264         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4265                             for name, values in svalues.items()))
4266              for storage, svalues in new_disk_state.items())
4267
4268     if self.op.ipolicy:
4269       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4270                                             group_policy=False)
4271
4272       all_instances = self.cfg.GetAllInstancesInfo().values()
4273       violations = set()
4274       for group in self.cfg.GetAllNodeGroupsInfo().values():
4275         instances = frozenset([inst for inst in all_instances
4276                                if compat.any(node in group.members
4277                                              for node in inst.all_nodes)])
4278         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4279         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4280         new = _ComputeNewInstanceViolations(ipol,
4281                                             new_ipolicy, instances)
4282         if new:
4283           violations.update(new)
4284
4285       if violations:
4286         self.LogWarning("After the ipolicy change the following instances"
4287                         " violate them: %s",
4288                         utils.CommaJoin(utils.NiceSort(violations)))
4289
4290     if self.op.nicparams:
4291       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4292       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4293       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4294       nic_errors = []
4295
4296       # check all instances for consistency
4297       for instance in self.cfg.GetAllInstancesInfo().values():
4298         for nic_idx, nic in enumerate(instance.nics):
4299           params_copy = copy.deepcopy(nic.nicparams)
4300           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4301
4302           # check parameter syntax
4303           try:
4304             objects.NIC.CheckParameterSyntax(params_filled)
4305           except errors.ConfigurationError, err:
4306             nic_errors.append("Instance %s, nic/%d: %s" %
4307                               (instance.name, nic_idx, err))
4308
4309           # if we're moving instances to routed, check that they have an ip
4310           target_mode = params_filled[constants.NIC_MODE]
4311           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4312             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4313                               " address" % (instance.name, nic_idx))
4314       if nic_errors:
4315         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4316                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4317
4318     # hypervisor list/parameters
4319     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4320     if self.op.hvparams:
4321       for hv_name, hv_dict in self.op.hvparams.items():
4322         if hv_name not in self.new_hvparams:
4323           self.new_hvparams[hv_name] = hv_dict
4324         else:
4325           self.new_hvparams[hv_name].update(hv_dict)
4326
4327     # disk template parameters
4328     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4329     if self.op.diskparams:
4330       for dt_name, dt_params in self.op.diskparams.items():
4331         if dt_name not in self.op.diskparams:
4332           self.new_diskparams[dt_name] = dt_params
4333         else:
4334           self.new_diskparams[dt_name].update(dt_params)
4335
4336     # os hypervisor parameters
4337     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4338     if self.op.os_hvp:
4339       for os_name, hvs in self.op.os_hvp.items():
4340         if os_name not in self.new_os_hvp:
4341           self.new_os_hvp[os_name] = hvs
4342         else:
4343           for hv_name, hv_dict in hvs.items():
4344             if hv_dict is None:
4345               # Delete if it exists
4346               self.new_os_hvp[os_name].pop(hv_name, None)
4347             elif hv_name not in self.new_os_hvp[os_name]:
4348               self.new_os_hvp[os_name][hv_name] = hv_dict
4349             else:
4350               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4351
4352     # os parameters
4353     self.new_osp = objects.FillDict(cluster.osparams, {})
4354     if self.op.osparams:
4355       for os_name, osp in self.op.osparams.items():
4356         if os_name not in self.new_osp:
4357           self.new_osp[os_name] = {}
4358
4359         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4360                                                   use_none=True)
4361
4362         if not self.new_osp[os_name]:
4363           # we removed all parameters
4364           del self.new_osp[os_name]
4365         else:
4366           # check the parameter validity (remote check)
4367           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4368                          os_name, self.new_osp[os_name])
4369
4370     # changes to the hypervisor list
4371     if self.op.enabled_hypervisors is not None:
4372       self.hv_list = self.op.enabled_hypervisors
4373       for hv in self.hv_list:
4374         # if the hypervisor doesn't already exist in the cluster
4375         # hvparams, we initialize it to empty, and then (in both
4376         # cases) we make sure to fill the defaults, as we might not
4377         # have a complete defaults list if the hypervisor wasn't
4378         # enabled before
4379         if hv not in new_hvp:
4380           new_hvp[hv] = {}
4381         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4382         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4383     else:
4384       self.hv_list = cluster.enabled_hypervisors
4385
4386     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4387       # either the enabled list has changed, or the parameters have, validate
4388       for hv_name, hv_params in self.new_hvparams.items():
4389         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4390             (self.op.enabled_hypervisors and
4391              hv_name in self.op.enabled_hypervisors)):
4392           # either this is a new hypervisor, or its parameters have changed
4393           hv_class = hypervisor.GetHypervisorClass(hv_name)
4394           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4395           hv_class.CheckParameterSyntax(hv_params)
4396           _CheckHVParams(self, node_list, hv_name, hv_params)
4397
4398     if self.op.os_hvp:
4399       # no need to check any newly-enabled hypervisors, since the
4400       # defaults have already been checked in the above code-block
4401       for os_name, os_hvp in self.new_os_hvp.items():
4402         for hv_name, hv_params in os_hvp.items():
4403           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4404           # we need to fill in the new os_hvp on top of the actual hv_p
4405           cluster_defaults = self.new_hvparams.get(hv_name, {})
4406           new_osp = objects.FillDict(cluster_defaults, hv_params)
4407           hv_class = hypervisor.GetHypervisorClass(hv_name)
4408           hv_class.CheckParameterSyntax(new_osp)
4409           _CheckHVParams(self, node_list, hv_name, new_osp)
4410
4411     if self.op.default_iallocator:
4412       alloc_script = utils.FindFile(self.op.default_iallocator,
4413                                     constants.IALLOCATOR_SEARCH_PATH,
4414                                     os.path.isfile)
4415       if alloc_script is None:
4416         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4417                                    " specified" % self.op.default_iallocator,
4418                                    errors.ECODE_INVAL)
4419
4420   def Exec(self, feedback_fn):
4421     """Change the parameters of the cluster.
4422
4423     """
4424     if self.op.vg_name is not None:
4425       new_volume = self.op.vg_name
4426       if not new_volume:
4427         new_volume = None
4428       if new_volume != self.cfg.GetVGName():
4429         self.cfg.SetVGName(new_volume)
4430       else:
4431         feedback_fn("Cluster LVM configuration already in desired"
4432                     " state, not changing")
4433     if self.op.drbd_helper is not None:
4434       new_helper = self.op.drbd_helper
4435       if not new_helper:
4436         new_helper = None
4437       if new_helper != self.cfg.GetDRBDHelper():
4438         self.cfg.SetDRBDHelper(new_helper)
4439       else:
4440         feedback_fn("Cluster DRBD helper already in desired state,"
4441                     " not changing")
4442     if self.op.hvparams:
4443       self.cluster.hvparams = self.new_hvparams
4444     if self.op.os_hvp:
4445       self.cluster.os_hvp = self.new_os_hvp
4446     if self.op.enabled_hypervisors is not None:
4447       self.cluster.hvparams = self.new_hvparams
4448       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4449     if self.op.beparams:
4450       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4451     if self.op.nicparams:
4452       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4453     if self.op.ipolicy:
4454       self.cluster.ipolicy = self.new_ipolicy
4455     if self.op.osparams:
4456       self.cluster.osparams = self.new_osp
4457     if self.op.ndparams:
4458       self.cluster.ndparams = self.new_ndparams
4459     if self.op.diskparams:
4460       self.cluster.diskparams = self.new_diskparams
4461     if self.op.hv_state:
4462       self.cluster.hv_state_static = self.new_hv_state
4463     if self.op.disk_state:
4464       self.cluster.disk_state_static = self.new_disk_state
4465
4466     if self.op.candidate_pool_size is not None:
4467       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4468       # we need to update the pool size here, otherwise the save will fail
4469       _AdjustCandidatePool(self, [])
4470
4471     if self.op.maintain_node_health is not None:
4472       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4473         feedback_fn("Note: CONFD was disabled at build time, node health"
4474                     " maintenance is not useful (still enabling it)")
4475       self.cluster.maintain_node_health = self.op.maintain_node_health
4476
4477     if self.op.prealloc_wipe_disks is not None:
4478       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4479
4480     if self.op.add_uids is not None:
4481       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4482
4483     if self.op.remove_uids is not None:
4484       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4485
4486     if self.op.uid_pool is not None:
4487       self.cluster.uid_pool = self.op.uid_pool
4488
4489     if self.op.default_iallocator is not None:
4490       self.cluster.default_iallocator = self.op.default_iallocator
4491
4492     if self.op.reserved_lvs is not None:
4493       self.cluster.reserved_lvs = self.op.reserved_lvs
4494
4495     if self.op.use_external_mip_script is not None:
4496       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4497
4498     def helper_os(aname, mods, desc):
4499       desc += " OS list"
4500       lst = getattr(self.cluster, aname)
4501       for key, val in mods:
4502         if key == constants.DDM_ADD:
4503           if val in lst:
4504             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4505           else:
4506             lst.append(val)
4507         elif key == constants.DDM_REMOVE:
4508           if val in lst:
4509             lst.remove(val)
4510           else:
4511             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4512         else:
4513           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4514
4515     if self.op.hidden_os:
4516       helper_os("hidden_os", self.op.hidden_os, "hidden")
4517
4518     if self.op.blacklisted_os:
4519       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4520
4521     if self.op.master_netdev:
4522       master_params = self.cfg.GetMasterNetworkParameters()
4523       ems = self.cfg.GetUseExternalMipScript()
4524       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4525                   self.cluster.master_netdev)
4526       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4527                                                        master_params, ems)
4528       result.Raise("Could not disable the master ip")
4529       feedback_fn("Changing master_netdev from %s to %s" %
4530                   (master_params.netdev, self.op.master_netdev))
4531       self.cluster.master_netdev = self.op.master_netdev
4532
4533     if self.op.master_netmask:
4534       master_params = self.cfg.GetMasterNetworkParameters()
4535       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4536       result = self.rpc.call_node_change_master_netmask(master_params.name,
4537                                                         master_params.netmask,
4538                                                         self.op.master_netmask,
4539                                                         master_params.ip,
4540                                                         master_params.netdev)
4541       if result.fail_msg:
4542         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4543         feedback_fn(msg)
4544
4545       self.cluster.master_netmask = self.op.master_netmask
4546
4547     self.cfg.Update(self.cluster, feedback_fn)
4548
4549     if self.op.master_netdev:
4550       master_params = self.cfg.GetMasterNetworkParameters()
4551       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4552                   self.op.master_netdev)
4553       ems = self.cfg.GetUseExternalMipScript()
4554       result = self.rpc.call_node_activate_master_ip(master_params.name,
4555                                                      master_params, ems)
4556       if result.fail_msg:
4557         self.LogWarning("Could not re-enable the master ip on"
4558                         " the master, please restart manually: %s",
4559                         result.fail_msg)
4560
4561
4562 def _UploadHelper(lu, nodes, fname):
4563   """Helper for uploading a file and showing warnings.
4564
4565   """
4566   if os.path.exists(fname):
4567     result = lu.rpc.call_upload_file(nodes, fname)
4568     for to_node, to_result in result.items():
4569       msg = to_result.fail_msg
4570       if msg:
4571         msg = ("Copy of file %s to node %s failed: %s" %
4572                (fname, to_node, msg))
4573         lu.LogWarning(msg)
4574
4575
4576 def _ComputeAncillaryFiles(cluster, redist):
4577   """Compute files external to Ganeti which need to be consistent.
4578
4579   @type redist: boolean
4580   @param redist: Whether to include files which need to be redistributed
4581
4582   """
4583   # Compute files for all nodes
4584   files_all = set([
4585     pathutils.SSH_KNOWN_HOSTS_FILE,
4586     pathutils.CONFD_HMAC_KEY,
4587     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4588     pathutils.SPICE_CERT_FILE,
4589     pathutils.SPICE_CACERT_FILE,
4590     pathutils.RAPI_USERS_FILE,
4591     ])
4592
4593   if redist:
4594     # we need to ship at least the RAPI certificate
4595     files_all.add(pathutils.RAPI_CERT_FILE)
4596   else:
4597     files_all.update(pathutils.ALL_CERT_FILES)
4598     files_all.update(ssconf.SimpleStore().GetFileList())
4599
4600   if cluster.modify_etc_hosts:
4601     files_all.add(pathutils.ETC_HOSTS)
4602
4603   if cluster.use_external_mip_script:
4604     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4605
4606   # Files which are optional, these must:
4607   # - be present in one other category as well
4608   # - either exist or not exist on all nodes of that category (mc, vm all)
4609   files_opt = set([
4610     pathutils.RAPI_USERS_FILE,
4611     ])
4612
4613   # Files which should only be on master candidates
4614   files_mc = set()
4615
4616   if not redist:
4617     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4618
4619   # File storage
4620   if (not redist and
4621       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4622     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4623     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4624
4625   # Files which should only be on VM-capable nodes
4626   files_vm = set(
4627     filename
4628     for hv_name in cluster.enabled_hypervisors
4629     for filename in
4630       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4631
4632   files_opt |= set(
4633     filename
4634     for hv_name in cluster.enabled_hypervisors
4635     for filename in
4636       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4637
4638   # Filenames in each category must be unique
4639   all_files_set = files_all | files_mc | files_vm
4640   assert (len(all_files_set) ==
4641           sum(map(len, [files_all, files_mc, files_vm]))), \
4642          "Found file listed in more than one file list"
4643
4644   # Optional files must be present in one other category
4645   assert all_files_set.issuperset(files_opt), \
4646          "Optional file not in a different required list"
4647
4648   # This one file should never ever be re-distributed via RPC
4649   assert not (redist and
4650               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4651
4652   return (files_all, files_opt, files_mc, files_vm)
4653
4654
4655 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4656   """Distribute additional files which are part of the cluster configuration.
4657
4658   ConfigWriter takes care of distributing the config and ssconf files, but
4659   there are more files which should be distributed to all nodes. This function
4660   makes sure those are copied.
4661
4662   @param lu: calling logical unit
4663   @param additional_nodes: list of nodes not in the config to distribute to
4664   @type additional_vm: boolean
4665   @param additional_vm: whether the additional nodes are vm-capable or not
4666
4667   """
4668   # Gather target nodes
4669   cluster = lu.cfg.GetClusterInfo()
4670   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4671
4672   online_nodes = lu.cfg.GetOnlineNodeList()
4673   online_set = frozenset(online_nodes)
4674   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4675
4676   if additional_nodes is not None:
4677     online_nodes.extend(additional_nodes)
4678     if additional_vm:
4679       vm_nodes.extend(additional_nodes)
4680
4681   # Never distribute to master node
4682   for nodelist in [online_nodes, vm_nodes]:
4683     if master_info.name in nodelist:
4684       nodelist.remove(master_info.name)
4685
4686   # Gather file lists
4687   (files_all, _, files_mc, files_vm) = \
4688     _ComputeAncillaryFiles(cluster, True)
4689
4690   # Never re-distribute configuration file from here
4691   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4692               pathutils.CLUSTER_CONF_FILE in files_vm)
4693   assert not files_mc, "Master candidates not handled in this function"
4694
4695   filemap = [
4696     (online_nodes, files_all),
4697     (vm_nodes, files_vm),
4698     ]
4699
4700   # Upload the files
4701   for (node_list, files) in filemap:
4702     for fname in files:
4703       _UploadHelper(lu, node_list, fname)
4704
4705
4706 class LUClusterRedistConf(NoHooksLU):
4707   """Force the redistribution of cluster configuration.
4708
4709   This is a very simple LU.
4710
4711   """
4712   REQ_BGL = False
4713
4714   def ExpandNames(self):
4715     self.needed_locks = {
4716       locking.LEVEL_NODE: locking.ALL_SET,
4717       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4718     }
4719     self.share_locks = _ShareAll()
4720
4721   def Exec(self, feedback_fn):
4722     """Redistribute the configuration.
4723
4724     """
4725     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4726     _RedistributeAncillaryFiles(self)
4727
4728
4729 class LUClusterActivateMasterIp(NoHooksLU):
4730   """Activate the master IP on the master node.
4731
4732   """
4733   def Exec(self, feedback_fn):
4734     """Activate the master IP.
4735
4736     """
4737     master_params = self.cfg.GetMasterNetworkParameters()
4738     ems = self.cfg.GetUseExternalMipScript()
4739     result = self.rpc.call_node_activate_master_ip(master_params.name,
4740                                                    master_params, ems)
4741     result.Raise("Could not activate the master IP")
4742
4743
4744 class LUClusterDeactivateMasterIp(NoHooksLU):
4745   """Deactivate the master IP on the master node.
4746
4747   """
4748   def Exec(self, feedback_fn):
4749     """Deactivate the master IP.
4750
4751     """
4752     master_params = self.cfg.GetMasterNetworkParameters()
4753     ems = self.cfg.GetUseExternalMipScript()
4754     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4755                                                      master_params, ems)
4756     result.Raise("Could not deactivate the master IP")
4757
4758
4759 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4760   """Sleep and poll for an instance's disk to sync.
4761
4762   """
4763   if not instance.disks or disks is not None and not disks:
4764     return True
4765
4766   disks = _ExpandCheckDisks(instance, disks)
4767
4768   if not oneshot:
4769     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4770
4771   node = instance.primary_node
4772
4773   for dev in disks:
4774     lu.cfg.SetDiskID(dev, node)
4775
4776   # TODO: Convert to utils.Retry
4777
4778   retries = 0
4779   degr_retries = 10 # in seconds, as we sleep 1 second each time
4780   while True:
4781     max_time = 0
4782     done = True
4783     cumul_degraded = False
4784     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4785     msg = rstats.fail_msg
4786     if msg:
4787       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4788       retries += 1
4789       if retries >= 10:
4790         raise errors.RemoteError("Can't contact node %s for mirror data,"
4791                                  " aborting." % node)
4792       time.sleep(6)
4793       continue
4794     rstats = rstats.payload
4795     retries = 0
4796     for i, mstat in enumerate(rstats):
4797       if mstat is None:
4798         lu.LogWarning("Can't compute data for node %s/%s",
4799                            node, disks[i].iv_name)
4800         continue
4801
4802       cumul_degraded = (cumul_degraded or
4803                         (mstat.is_degraded and mstat.sync_percent is None))
4804       if mstat.sync_percent is not None:
4805         done = False
4806         if mstat.estimated_time is not None:
4807           rem_time = ("%s remaining (estimated)" %
4808                       utils.FormatSeconds(mstat.estimated_time))
4809           max_time = mstat.estimated_time
4810         else:
4811           rem_time = "no time estimate"
4812         lu.LogInfo("- device %s: %5.2f%% done, %s",
4813                    disks[i].iv_name, mstat.sync_percent, rem_time)
4814
4815     # if we're done but degraded, let's do a few small retries, to
4816     # make sure we see a stable and not transient situation; therefore
4817     # we force restart of the loop
4818     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4819       logging.info("Degraded disks found, %d retries left", degr_retries)
4820       degr_retries -= 1
4821       time.sleep(1)
4822       continue
4823
4824     if done or oneshot:
4825       break
4826
4827     time.sleep(min(60, max_time))
4828
4829   if done:
4830     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4831
4832   return not cumul_degraded
4833
4834
4835 def _BlockdevFind(lu, node, dev, instance):
4836   """Wrapper around call_blockdev_find to annotate diskparams.
4837
4838   @param lu: A reference to the lu object
4839   @param node: The node to call out
4840   @param dev: The device to find
4841   @param instance: The instance object the device belongs to
4842   @returns The result of the rpc call
4843
4844   """
4845   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4846   return lu.rpc.call_blockdev_find(node, disk)
4847
4848
4849 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4850   """Wrapper around L{_CheckDiskConsistencyInner}.
4851
4852   """
4853   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4854   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4855                                     ldisk=ldisk)
4856
4857
4858 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4859                                ldisk=False):
4860   """Check that mirrors are not degraded.
4861
4862   @attention: The device has to be annotated already.
4863
4864   The ldisk parameter, if True, will change the test from the
4865   is_degraded attribute (which represents overall non-ok status for
4866   the device(s)) to the ldisk (representing the local storage status).
4867
4868   """
4869   lu.cfg.SetDiskID(dev, node)
4870
4871   result = True
4872
4873   if on_primary or dev.AssembleOnSecondary():
4874     rstats = lu.rpc.call_blockdev_find(node, dev)
4875     msg = rstats.fail_msg
4876     if msg:
4877       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4878       result = False
4879     elif not rstats.payload:
4880       lu.LogWarning("Can't find disk on node %s", node)
4881       result = False
4882     else:
4883       if ldisk:
4884         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4885       else:
4886         result = result and not rstats.payload.is_degraded
4887
4888   if dev.children:
4889     for child in dev.children:
4890       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4891                                                      on_primary)
4892
4893   return result
4894
4895
4896 class LUOobCommand(NoHooksLU):
4897   """Logical unit for OOB handling.
4898
4899   """
4900   REQ_BGL = False
4901   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4902
4903   def ExpandNames(self):
4904     """Gather locks we need.
4905
4906     """
4907     if self.op.node_names:
4908       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4909       lock_names = self.op.node_names
4910     else:
4911       lock_names = locking.ALL_SET
4912
4913     self.needed_locks = {
4914       locking.LEVEL_NODE: lock_names,
4915       }
4916
4917     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4918
4919     if not self.op.node_names:
4920       # Acquire node allocation lock only if all nodes are affected
4921       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4922
4923   def CheckPrereq(self):
4924     """Check prerequisites.
4925
4926     This checks:
4927      - the node exists in the configuration
4928      - OOB is supported
4929
4930     Any errors are signaled by raising errors.OpPrereqError.
4931
4932     """
4933     self.nodes = []
4934     self.master_node = self.cfg.GetMasterNode()
4935
4936     assert self.op.power_delay >= 0.0
4937
4938     if self.op.node_names:
4939       if (self.op.command in self._SKIP_MASTER and
4940           self.master_node in self.op.node_names):
4941         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4942         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4943
4944         if master_oob_handler:
4945           additional_text = ("run '%s %s %s' if you want to operate on the"
4946                              " master regardless") % (master_oob_handler,
4947                                                       self.op.command,
4948                                                       self.master_node)
4949         else:
4950           additional_text = "it does not support out-of-band operations"
4951
4952         raise errors.OpPrereqError(("Operating on the master node %s is not"
4953                                     " allowed for %s; %s") %
4954                                    (self.master_node, self.op.command,
4955                                     additional_text), errors.ECODE_INVAL)
4956     else:
4957       self.op.node_names = self.cfg.GetNodeList()
4958       if self.op.command in self._SKIP_MASTER:
4959         self.op.node_names.remove(self.master_node)
4960
4961     if self.op.command in self._SKIP_MASTER:
4962       assert self.master_node not in self.op.node_names
4963
4964     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4965       if node is None:
4966         raise errors.OpPrereqError("Node %s not found" % node_name,
4967                                    errors.ECODE_NOENT)
4968       else:
4969         self.nodes.append(node)
4970
4971       if (not self.op.ignore_status and
4972           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4973         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4974                                     " not marked offline") % node_name,
4975                                    errors.ECODE_STATE)
4976
4977   def Exec(self, feedback_fn):
4978     """Execute OOB and return result if we expect any.
4979
4980     """
4981     master_node = self.master_node
4982     ret = []
4983
4984     for idx, node in enumerate(utils.NiceSort(self.nodes,
4985                                               key=lambda node: node.name)):
4986       node_entry = [(constants.RS_NORMAL, node.name)]
4987       ret.append(node_entry)
4988
4989       oob_program = _SupportsOob(self.cfg, node)
4990
4991       if not oob_program:
4992         node_entry.append((constants.RS_UNAVAIL, None))
4993         continue
4994
4995       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4996                    self.op.command, oob_program, node.name)
4997       result = self.rpc.call_run_oob(master_node, oob_program,
4998                                      self.op.command, node.name,
4999                                      self.op.timeout)
5000
5001       if result.fail_msg:
5002         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5003                         node.name, result.fail_msg)
5004         node_entry.append((constants.RS_NODATA, None))
5005       else:
5006         try:
5007           self._CheckPayload(result)
5008         except errors.OpExecError, err:
5009           self.LogWarning("Payload returned by node '%s' is not valid: %s",
5010                           node.name, err)
5011           node_entry.append((constants.RS_NODATA, None))
5012         else:
5013           if self.op.command == constants.OOB_HEALTH:
5014             # For health we should log important events
5015             for item, status in result.payload:
5016               if status in [constants.OOB_STATUS_WARNING,
5017                             constants.OOB_STATUS_CRITICAL]:
5018                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5019                                 item, node.name, status)
5020
5021           if self.op.command == constants.OOB_POWER_ON:
5022             node.powered = True
5023           elif self.op.command == constants.OOB_POWER_OFF:
5024             node.powered = False
5025           elif self.op.command == constants.OOB_POWER_STATUS:
5026             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5027             if powered != node.powered:
5028               logging.warning(("Recorded power state (%s) of node '%s' does not"
5029                                " match actual power state (%s)"), node.powered,
5030                               node.name, powered)
5031
5032           # For configuration changing commands we should update the node
5033           if self.op.command in (constants.OOB_POWER_ON,
5034                                  constants.OOB_POWER_OFF):
5035             self.cfg.Update(node, feedback_fn)
5036
5037           node_entry.append((constants.RS_NORMAL, result.payload))
5038
5039           if (self.op.command == constants.OOB_POWER_ON and
5040               idx < len(self.nodes) - 1):
5041             time.sleep(self.op.power_delay)
5042
5043     return ret
5044
5045   def _CheckPayload(self, result):
5046     """Checks if the payload is valid.
5047
5048     @param result: RPC result
5049     @raises errors.OpExecError: If payload is not valid
5050
5051     """
5052     errs = []
5053     if self.op.command == constants.OOB_HEALTH:
5054       if not isinstance(result.payload, list):
5055         errs.append("command 'health' is expected to return a list but got %s" %
5056                     type(result.payload))
5057       else:
5058         for item, status in result.payload:
5059           if status not in constants.OOB_STATUSES:
5060             errs.append("health item '%s' has invalid status '%s'" %
5061                         (item, status))
5062
5063     if self.op.command == constants.OOB_POWER_STATUS:
5064       if not isinstance(result.payload, dict):
5065         errs.append("power-status is expected to return a dict but got %s" %
5066                     type(result.payload))
5067
5068     if self.op.command in [
5069       constants.OOB_POWER_ON,
5070       constants.OOB_POWER_OFF,
5071       constants.OOB_POWER_CYCLE,
5072       ]:
5073       if result.payload is not None:
5074         errs.append("%s is expected to not return payload but got '%s'" %
5075                     (self.op.command, result.payload))
5076
5077     if errs:
5078       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5079                                utils.CommaJoin(errs))
5080
5081
5082 class _OsQuery(_QueryBase):
5083   FIELDS = query.OS_FIELDS
5084
5085   def ExpandNames(self, lu):
5086     # Lock all nodes in shared mode
5087     # Temporary removal of locks, should be reverted later
5088     # TODO: reintroduce locks when they are lighter-weight
5089     lu.needed_locks = {}
5090     #self.share_locks[locking.LEVEL_NODE] = 1
5091     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5092
5093     # The following variables interact with _QueryBase._GetNames
5094     if self.names:
5095       self.wanted = self.names
5096     else:
5097       self.wanted = locking.ALL_SET
5098
5099     self.do_locking = self.use_locking
5100
5101   def DeclareLocks(self, lu, level):
5102     pass
5103
5104   @staticmethod
5105   def _DiagnoseByOS(rlist):
5106     """Remaps a per-node return list into an a per-os per-node dictionary
5107
5108     @param rlist: a map with node names as keys and OS objects as values
5109
5110     @rtype: dict
5111     @return: a dictionary with osnames as keys and as value another
5112         map, with nodes as keys and tuples of (path, status, diagnose,
5113         variants, parameters, api_versions) as values, eg::
5114
5115           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5116                                      (/srv/..., False, "invalid api")],
5117                            "node2": [(/srv/..., True, "", [], [])]}
5118           }
5119
5120     """
5121     all_os = {}
5122     # we build here the list of nodes that didn't fail the RPC (at RPC
5123     # level), so that nodes with a non-responding node daemon don't
5124     # make all OSes invalid
5125     good_nodes = [node_name for node_name in rlist
5126                   if not rlist[node_name].fail_msg]
5127     for node_name, nr in rlist.items():
5128       if nr.fail_msg or not nr.payload:
5129         continue
5130       for (name, path, status, diagnose, variants,
5131            params, api_versions) in nr.payload:
5132         if name not in all_os:
5133           # build a list of nodes for this os containing empty lists
5134           # for each node in node_list
5135           all_os[name] = {}
5136           for nname in good_nodes:
5137             all_os[name][nname] = []
5138         # convert params from [name, help] to (name, help)
5139         params = [tuple(v) for v in params]
5140         all_os[name][node_name].append((path, status, diagnose,
5141                                         variants, params, api_versions))
5142     return all_os
5143
5144   def _GetQueryData(self, lu):
5145     """Computes the list of nodes and their attributes.
5146
5147     """
5148     # Locking is not used
5149     assert not (compat.any(lu.glm.is_owned(level)
5150                            for level in locking.LEVELS
5151                            if level != locking.LEVEL_CLUSTER) or
5152                 self.do_locking or self.use_locking)
5153
5154     valid_nodes = [node.name
5155                    for node in lu.cfg.GetAllNodesInfo().values()
5156                    if not node.offline and node.vm_capable]
5157     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5158     cluster = lu.cfg.GetClusterInfo()
5159
5160     data = {}
5161
5162     for (os_name, os_data) in pol.items():
5163       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5164                           hidden=(os_name in cluster.hidden_os),
5165                           blacklisted=(os_name in cluster.blacklisted_os))
5166
5167       variants = set()
5168       parameters = set()
5169       api_versions = set()
5170
5171       for idx, osl in enumerate(os_data.values()):
5172         info.valid = bool(info.valid and osl and osl[0][1])
5173         if not info.valid:
5174           break
5175
5176         (node_variants, node_params, node_api) = osl[0][3:6]
5177         if idx == 0:
5178           # First entry
5179           variants.update(node_variants)
5180           parameters.update(node_params)
5181           api_versions.update(node_api)
5182         else:
5183           # Filter out inconsistent values
5184           variants.intersection_update(node_variants)
5185           parameters.intersection_update(node_params)
5186           api_versions.intersection_update(node_api)
5187
5188       info.variants = list(variants)
5189       info.parameters = list(parameters)
5190       info.api_versions = list(api_versions)
5191
5192       data[os_name] = info
5193
5194     # Prepare data in requested order
5195     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5196             if name in data]
5197
5198
5199 class LUOsDiagnose(NoHooksLU):
5200   """Logical unit for OS diagnose/query.
5201
5202   """
5203   REQ_BGL = False
5204
5205   @staticmethod
5206   def _BuildFilter(fields, names):
5207     """Builds a filter for querying OSes.
5208
5209     """
5210     name_filter = qlang.MakeSimpleFilter("name", names)
5211
5212     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5213     # respective field is not requested
5214     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5215                      for fname in ["hidden", "blacklisted"]
5216                      if fname not in fields]
5217     if "valid" not in fields:
5218       status_filter.append([qlang.OP_TRUE, "valid"])
5219
5220     if status_filter:
5221       status_filter.insert(0, qlang.OP_AND)
5222     else:
5223       status_filter = None
5224
5225     if name_filter and status_filter:
5226       return [qlang.OP_AND, name_filter, status_filter]
5227     elif name_filter:
5228       return name_filter
5229     else:
5230       return status_filter
5231
5232   def CheckArguments(self):
5233     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5234                        self.op.output_fields, False)
5235
5236   def ExpandNames(self):
5237     self.oq.ExpandNames(self)
5238
5239   def Exec(self, feedback_fn):
5240     return self.oq.OldStyleQuery(self)
5241
5242
5243 class _ExtStorageQuery(_QueryBase):
5244   FIELDS = query.EXTSTORAGE_FIELDS
5245
5246   def ExpandNames(self, lu):
5247     # Lock all nodes in shared mode
5248     # Temporary removal of locks, should be reverted later
5249     # TODO: reintroduce locks when they are lighter-weight
5250     lu.needed_locks = {}
5251     #self.share_locks[locking.LEVEL_NODE] = 1
5252     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5253
5254     # The following variables interact with _QueryBase._GetNames
5255     if self.names:
5256       self.wanted = self.names
5257     else:
5258       self.wanted = locking.ALL_SET
5259
5260     self.do_locking = self.use_locking
5261
5262   def DeclareLocks(self, lu, level):
5263     pass
5264
5265   @staticmethod
5266   def _DiagnoseByProvider(rlist):
5267     """Remaps a per-node return list into an a per-provider per-node dictionary
5268
5269     @param rlist: a map with node names as keys and ExtStorage objects as values
5270
5271     @rtype: dict
5272     @return: a dictionary with extstorage providers as keys and as
5273         value another map, with nodes as keys and tuples of
5274         (path, status, diagnose, parameters) as values, eg::
5275
5276           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5277                          "node2": [(/srv/..., False, "missing file")]
5278                          "node3": [(/srv/..., True, "", [])]
5279           }
5280
5281     """
5282     all_es = {}
5283     # we build here the list of nodes that didn't fail the RPC (at RPC
5284     # level), so that nodes with a non-responding node daemon don't
5285     # make all OSes invalid
5286     good_nodes = [node_name for node_name in rlist
5287                   if not rlist[node_name].fail_msg]
5288     for node_name, nr in rlist.items():
5289       if nr.fail_msg or not nr.payload:
5290         continue
5291       for (name, path, status, diagnose, params) in nr.payload:
5292         if name not in all_es:
5293           # build a list of nodes for this os containing empty lists
5294           # for each node in node_list
5295           all_es[name] = {}
5296           for nname in good_nodes:
5297             all_es[name][nname] = []
5298         # convert params from [name, help] to (name, help)
5299         params = [tuple(v) for v in params]
5300         all_es[name][node_name].append((path, status, diagnose, params))
5301     return all_es
5302
5303   def _GetQueryData(self, lu):
5304     """Computes the list of nodes and their attributes.
5305
5306     """
5307     # Locking is not used
5308     assert not (compat.any(lu.glm.is_owned(level)
5309                            for level in locking.LEVELS
5310                            if level != locking.LEVEL_CLUSTER) or
5311                 self.do_locking or self.use_locking)
5312
5313     valid_nodes = [node.name
5314                    for node in lu.cfg.GetAllNodesInfo().values()
5315                    if not node.offline and node.vm_capable]
5316     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5317
5318     data = {}
5319
5320     nodegroup_list = lu.cfg.GetNodeGroupList()
5321
5322     for (es_name, es_data) in pol.items():
5323       # For every provider compute the nodegroup validity.
5324       # To do this we need to check the validity of each node in es_data
5325       # and then construct the corresponding nodegroup dict:
5326       #      { nodegroup1: status
5327       #        nodegroup2: status
5328       #      }
5329       ndgrp_data = {}
5330       for nodegroup in nodegroup_list:
5331         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5332
5333         nodegroup_nodes = ndgrp.members
5334         nodegroup_name = ndgrp.name
5335         node_statuses = []
5336
5337         for node in nodegroup_nodes:
5338           if node in valid_nodes:
5339             if es_data[node] != []:
5340               node_status = es_data[node][0][1]
5341               node_statuses.append(node_status)
5342             else:
5343               node_statuses.append(False)
5344
5345         if False in node_statuses:
5346           ndgrp_data[nodegroup_name] = False
5347         else:
5348           ndgrp_data[nodegroup_name] = True
5349
5350       # Compute the provider's parameters
5351       parameters = set()
5352       for idx, esl in enumerate(es_data.values()):
5353         valid = bool(esl and esl[0][1])
5354         if not valid:
5355           break
5356
5357         node_params = esl[0][3]
5358         if idx == 0:
5359           # First entry
5360           parameters.update(node_params)
5361         else:
5362           # Filter out inconsistent values
5363           parameters.intersection_update(node_params)
5364
5365       params = list(parameters)
5366
5367       # Now fill all the info for this provider
5368       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5369                                   nodegroup_status=ndgrp_data,
5370                                   parameters=params)
5371
5372       data[es_name] = info
5373
5374     # Prepare data in requested order
5375     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5376             if name in data]
5377
5378
5379 class LUExtStorageDiagnose(NoHooksLU):
5380   """Logical unit for ExtStorage diagnose/query.
5381
5382   """
5383   REQ_BGL = False
5384
5385   def CheckArguments(self):
5386     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5387                                self.op.output_fields, False)
5388
5389   def ExpandNames(self):
5390     self.eq.ExpandNames(self)
5391
5392   def Exec(self, feedback_fn):
5393     return self.eq.OldStyleQuery(self)
5394
5395
5396 class LUNodeRemove(LogicalUnit):
5397   """Logical unit for removing a node.
5398
5399   """
5400   HPATH = "node-remove"
5401   HTYPE = constants.HTYPE_NODE
5402
5403   def BuildHooksEnv(self):
5404     """Build hooks env.
5405
5406     """
5407     return {
5408       "OP_TARGET": self.op.node_name,
5409       "NODE_NAME": self.op.node_name,
5410       }
5411
5412   def BuildHooksNodes(self):
5413     """Build hooks nodes.
5414
5415     This doesn't run on the target node in the pre phase as a failed
5416     node would then be impossible to remove.
5417
5418     """
5419     all_nodes = self.cfg.GetNodeList()
5420     try:
5421       all_nodes.remove(self.op.node_name)
5422     except ValueError:
5423       pass
5424     return (all_nodes, all_nodes)
5425
5426   def CheckPrereq(self):
5427     """Check prerequisites.
5428
5429     This checks:
5430      - the node exists in the configuration
5431      - it does not have primary or secondary instances
5432      - it's not the master
5433
5434     Any errors are signaled by raising errors.OpPrereqError.
5435
5436     """
5437     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5438     node = self.cfg.GetNodeInfo(self.op.node_name)
5439     assert node is not None
5440
5441     masternode = self.cfg.GetMasterNode()
5442     if node.name == masternode:
5443       raise errors.OpPrereqError("Node is the master node, failover to another"
5444                                  " node is required", errors.ECODE_INVAL)
5445
5446     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5447       if node.name in instance.all_nodes:
5448         raise errors.OpPrereqError("Instance %s is still running on the node,"
5449                                    " please remove first" % instance_name,
5450                                    errors.ECODE_INVAL)
5451     self.op.node_name = node.name
5452     self.node = node
5453
5454   def Exec(self, feedback_fn):
5455     """Removes the node from the cluster.
5456
5457     """
5458     node = self.node
5459     logging.info("Stopping the node daemon and removing configs from node %s",
5460                  node.name)
5461
5462     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5463
5464     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5465       "Not owning BGL"
5466
5467     # Promote nodes to master candidate as needed
5468     _AdjustCandidatePool(self, exceptions=[node.name])
5469     self.context.RemoveNode(node.name)
5470
5471     # Run post hooks on the node before it's removed
5472     _RunPostHook(self, node.name)
5473
5474     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5475     msg = result.fail_msg
5476     if msg:
5477       self.LogWarning("Errors encountered on the remote node while leaving"
5478                       " the cluster: %s", msg)
5479
5480     # Remove node from our /etc/hosts
5481     if self.cfg.GetClusterInfo().modify_etc_hosts:
5482       master_node = self.cfg.GetMasterNode()
5483       result = self.rpc.call_etc_hosts_modify(master_node,
5484                                               constants.ETC_HOSTS_REMOVE,
5485                                               node.name, None)
5486       result.Raise("Can't update hosts file with new host data")
5487       _RedistributeAncillaryFiles(self)
5488
5489
5490 class _NodeQuery(_QueryBase):
5491   FIELDS = query.NODE_FIELDS
5492
5493   def ExpandNames(self, lu):
5494     lu.needed_locks = {}
5495     lu.share_locks = _ShareAll()
5496
5497     if self.names:
5498       self.wanted = _GetWantedNodes(lu, self.names)
5499     else:
5500       self.wanted = locking.ALL_SET
5501
5502     self.do_locking = (self.use_locking and
5503                        query.NQ_LIVE in self.requested_data)
5504
5505     if self.do_locking:
5506       # If any non-static field is requested we need to lock the nodes
5507       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5508       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5509
5510   def DeclareLocks(self, lu, level):
5511     pass
5512
5513   def _GetQueryData(self, lu):
5514     """Computes the list of nodes and their attributes.
5515
5516     """
5517     all_info = lu.cfg.GetAllNodesInfo()
5518
5519     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5520
5521     # Gather data as requested
5522     if query.NQ_LIVE in self.requested_data:
5523       # filter out non-vm_capable nodes
5524       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5525
5526       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5527       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5528                                         [lu.cfg.GetHypervisorType()], es_flags)
5529       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5530                        for (name, nresult) in node_data.items()
5531                        if not nresult.fail_msg and nresult.payload)
5532     else:
5533       live_data = None
5534
5535     if query.NQ_INST in self.requested_data:
5536       node_to_primary = dict([(name, set()) for name in nodenames])
5537       node_to_secondary = dict([(name, set()) for name in nodenames])
5538
5539       inst_data = lu.cfg.GetAllInstancesInfo()
5540
5541       for inst in inst_data.values():
5542         if inst.primary_node in node_to_primary:
5543           node_to_primary[inst.primary_node].add(inst.name)
5544         for secnode in inst.secondary_nodes:
5545           if secnode in node_to_secondary:
5546             node_to_secondary[secnode].add(inst.name)
5547     else:
5548       node_to_primary = None
5549       node_to_secondary = None
5550
5551     if query.NQ_OOB in self.requested_data:
5552       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5553                          for name, node in all_info.iteritems())
5554     else:
5555       oob_support = None
5556
5557     if query.NQ_GROUP in self.requested_data:
5558       groups = lu.cfg.GetAllNodeGroupsInfo()
5559     else:
5560       groups = {}
5561
5562     return query.NodeQueryData([all_info[name] for name in nodenames],
5563                                live_data, lu.cfg.GetMasterNode(),
5564                                node_to_primary, node_to_secondary, groups,
5565                                oob_support, lu.cfg.GetClusterInfo())
5566
5567
5568 class LUNodeQuery(NoHooksLU):
5569   """Logical unit for querying nodes.
5570
5571   """
5572   # pylint: disable=W0142
5573   REQ_BGL = False
5574
5575   def CheckArguments(self):
5576     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5577                          self.op.output_fields, self.op.use_locking)
5578
5579   def ExpandNames(self):
5580     self.nq.ExpandNames(self)
5581
5582   def DeclareLocks(self, level):
5583     self.nq.DeclareLocks(self, level)
5584
5585   def Exec(self, feedback_fn):
5586     return self.nq.OldStyleQuery(self)
5587
5588
5589 class LUNodeQueryvols(NoHooksLU):
5590   """Logical unit for getting volumes on node(s).
5591
5592   """
5593   REQ_BGL = False
5594   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5595   _FIELDS_STATIC = utils.FieldSet("node")
5596
5597   def CheckArguments(self):
5598     _CheckOutputFields(static=self._FIELDS_STATIC,
5599                        dynamic=self._FIELDS_DYNAMIC,
5600                        selected=self.op.output_fields)
5601
5602   def ExpandNames(self):
5603     self.share_locks = _ShareAll()
5604
5605     if self.op.nodes:
5606       self.needed_locks = {
5607         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5608         }
5609     else:
5610       self.needed_locks = {
5611         locking.LEVEL_NODE: locking.ALL_SET,
5612         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5613         }
5614
5615   def Exec(self, feedback_fn):
5616     """Computes the list of nodes and their attributes.
5617
5618     """
5619     nodenames = self.owned_locks(locking.LEVEL_NODE)
5620     volumes = self.rpc.call_node_volumes(nodenames)
5621
5622     ilist = self.cfg.GetAllInstancesInfo()
5623     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5624
5625     output = []
5626     for node in nodenames:
5627       nresult = volumes[node]
5628       if nresult.offline:
5629         continue
5630       msg = nresult.fail_msg
5631       if msg:
5632         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5633         continue
5634
5635       node_vols = sorted(nresult.payload,
5636                          key=operator.itemgetter("dev"))
5637
5638       for vol in node_vols:
5639         node_output = []
5640         for field in self.op.output_fields:
5641           if field == "node":
5642             val = node
5643           elif field == "phys":
5644             val = vol["dev"]
5645           elif field == "vg":
5646             val = vol["vg"]
5647           elif field == "name":
5648             val = vol["name"]
5649           elif field == "size":
5650             val = int(float(vol["size"]))
5651           elif field == "instance":
5652             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5653           else:
5654             raise errors.ParameterError(field)
5655           node_output.append(str(val))
5656
5657         output.append(node_output)
5658
5659     return output
5660
5661
5662 class LUNodeQueryStorage(NoHooksLU):
5663   """Logical unit for getting information on storage units on node(s).
5664
5665   """
5666   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5667   REQ_BGL = False
5668
5669   def CheckArguments(self):
5670     _CheckOutputFields(static=self._FIELDS_STATIC,
5671                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5672                        selected=self.op.output_fields)
5673
5674   def ExpandNames(self):
5675     self.share_locks = _ShareAll()
5676
5677     if self.op.nodes:
5678       self.needed_locks = {
5679         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5680         }
5681     else:
5682       self.needed_locks = {
5683         locking.LEVEL_NODE: locking.ALL_SET,
5684         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5685         }
5686
5687   def Exec(self, feedback_fn):
5688     """Computes the list of nodes and their attributes.
5689
5690     """
5691     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5692
5693     # Always get name to sort by
5694     if constants.SF_NAME in self.op.output_fields:
5695       fields = self.op.output_fields[:]
5696     else:
5697       fields = [constants.SF_NAME] + self.op.output_fields
5698
5699     # Never ask for node or type as it's only known to the LU
5700     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5701       while extra in fields:
5702         fields.remove(extra)
5703
5704     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5705     name_idx = field_idx[constants.SF_NAME]
5706
5707     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5708     data = self.rpc.call_storage_list(self.nodes,
5709                                       self.op.storage_type, st_args,
5710                                       self.op.name, fields)
5711
5712     result = []
5713
5714     for node in utils.NiceSort(self.nodes):
5715       nresult = data[node]
5716       if nresult.offline:
5717         continue
5718
5719       msg = nresult.fail_msg
5720       if msg:
5721         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5722         continue
5723
5724       rows = dict([(row[name_idx], row) for row in nresult.payload])
5725
5726       for name in utils.NiceSort(rows.keys()):
5727         row = rows[name]
5728
5729         out = []
5730
5731         for field in self.op.output_fields:
5732           if field == constants.SF_NODE:
5733             val = node
5734           elif field == constants.SF_TYPE:
5735             val = self.op.storage_type
5736           elif field in field_idx:
5737             val = row[field_idx[field]]
5738           else:
5739             raise errors.ParameterError(field)
5740
5741           out.append(val)
5742
5743         result.append(out)
5744
5745     return result
5746
5747
5748 class _InstanceQuery(_QueryBase):
5749   FIELDS = query.INSTANCE_FIELDS
5750
5751   def ExpandNames(self, lu):
5752     lu.needed_locks = {}
5753     lu.share_locks = _ShareAll()
5754
5755     if self.names:
5756       self.wanted = _GetWantedInstances(lu, self.names)
5757     else:
5758       self.wanted = locking.ALL_SET
5759
5760     self.do_locking = (self.use_locking and
5761                        query.IQ_LIVE in self.requested_data)
5762     if self.do_locking:
5763       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5764       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5765       lu.needed_locks[locking.LEVEL_NODE] = []
5766       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5767
5768     self.do_grouplocks = (self.do_locking and
5769                           query.IQ_NODES in self.requested_data)
5770
5771   def DeclareLocks(self, lu, level):
5772     if self.do_locking:
5773       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5774         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5775
5776         # Lock all groups used by instances optimistically; this requires going
5777         # via the node before it's locked, requiring verification later on
5778         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5779           set(group_uuid
5780               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5781               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5782       elif level == locking.LEVEL_NODE:
5783         lu._LockInstancesNodes() # pylint: disable=W0212
5784
5785   @staticmethod
5786   def _CheckGroupLocks(lu):
5787     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5788     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5789
5790     # Check if node groups for locked instances are still correct
5791     for instance_name in owned_instances:
5792       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5793
5794   def _GetQueryData(self, lu):
5795     """Computes the list of instances and their attributes.
5796
5797     """
5798     if self.do_grouplocks:
5799       self._CheckGroupLocks(lu)
5800
5801     cluster = lu.cfg.GetClusterInfo()
5802     all_info = lu.cfg.GetAllInstancesInfo()
5803
5804     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5805
5806     instance_list = [all_info[name] for name in instance_names]
5807     nodes = frozenset(itertools.chain(*(inst.all_nodes
5808                                         for inst in instance_list)))
5809     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5810     bad_nodes = []
5811     offline_nodes = []
5812     wrongnode_inst = set()
5813
5814     # Gather data as requested
5815     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5816       live_data = {}
5817       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5818       for name in nodes:
5819         result = node_data[name]
5820         if result.offline:
5821           # offline nodes will be in both lists
5822           assert result.fail_msg
5823           offline_nodes.append(name)
5824         if result.fail_msg:
5825           bad_nodes.append(name)
5826         elif result.payload:
5827           for inst in result.payload:
5828             if inst in all_info:
5829               if all_info[inst].primary_node == name:
5830                 live_data.update(result.payload)
5831               else:
5832                 wrongnode_inst.add(inst)
5833             else:
5834               # orphan instance; we don't list it here as we don't
5835               # handle this case yet in the output of instance listing
5836               logging.warning("Orphan instance '%s' found on node %s",
5837                               inst, name)
5838         # else no instance is alive
5839     else:
5840       live_data = {}
5841
5842     if query.IQ_DISKUSAGE in self.requested_data:
5843       gmi = ganeti.masterd.instance
5844       disk_usage = dict((inst.name,
5845                          gmi.ComputeDiskSize(inst.disk_template,
5846                                              [{constants.IDISK_SIZE: disk.size}
5847                                               for disk in inst.disks]))
5848                         for inst in instance_list)
5849     else:
5850       disk_usage = None
5851
5852     if query.IQ_CONSOLE in self.requested_data:
5853       consinfo = {}
5854       for inst in instance_list:
5855         if inst.name in live_data:
5856           # Instance is running
5857           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5858         else:
5859           consinfo[inst.name] = None
5860       assert set(consinfo.keys()) == set(instance_names)
5861     else:
5862       consinfo = None
5863
5864     if query.IQ_NODES in self.requested_data:
5865       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5866                                             instance_list)))
5867       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5868       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5869                     for uuid in set(map(operator.attrgetter("group"),
5870                                         nodes.values())))
5871     else:
5872       nodes = None
5873       groups = None
5874
5875     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5876                                    disk_usage, offline_nodes, bad_nodes,
5877                                    live_data, wrongnode_inst, consinfo,
5878                                    nodes, groups)
5879
5880
5881 class LUQuery(NoHooksLU):
5882   """Query for resources/items of a certain kind.
5883
5884   """
5885   # pylint: disable=W0142
5886   REQ_BGL = False
5887
5888   def CheckArguments(self):
5889     qcls = _GetQueryImplementation(self.op.what)
5890
5891     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5892
5893   def ExpandNames(self):
5894     self.impl.ExpandNames(self)
5895
5896   def DeclareLocks(self, level):
5897     self.impl.DeclareLocks(self, level)
5898
5899   def Exec(self, feedback_fn):
5900     return self.impl.NewStyleQuery(self)
5901
5902
5903 class LUQueryFields(NoHooksLU):
5904   """Query for resources/items of a certain kind.
5905
5906   """
5907   # pylint: disable=W0142
5908   REQ_BGL = False
5909
5910   def CheckArguments(self):
5911     self.qcls = _GetQueryImplementation(self.op.what)
5912
5913   def ExpandNames(self):
5914     self.needed_locks = {}
5915
5916   def Exec(self, feedback_fn):
5917     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5918
5919
5920 class LUNodeModifyStorage(NoHooksLU):
5921   """Logical unit for modifying a storage volume on a node.
5922
5923   """
5924   REQ_BGL = False
5925
5926   def CheckArguments(self):
5927     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5928
5929     storage_type = self.op.storage_type
5930
5931     try:
5932       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5933     except KeyError:
5934       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5935                                  " modified" % storage_type,
5936                                  errors.ECODE_INVAL)
5937
5938     diff = set(self.op.changes.keys()) - modifiable
5939     if diff:
5940       raise errors.OpPrereqError("The following fields can not be modified for"
5941                                  " storage units of type '%s': %r" %
5942                                  (storage_type, list(diff)),
5943                                  errors.ECODE_INVAL)
5944
5945   def ExpandNames(self):
5946     self.needed_locks = {
5947       locking.LEVEL_NODE: self.op.node_name,
5948       }
5949
5950   def Exec(self, feedback_fn):
5951     """Computes the list of nodes and their attributes.
5952
5953     """
5954     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5955     result = self.rpc.call_storage_modify(self.op.node_name,
5956                                           self.op.storage_type, st_args,
5957                                           self.op.name, self.op.changes)
5958     result.Raise("Failed to modify storage unit '%s' on %s" %
5959                  (self.op.name, self.op.node_name))
5960
5961
5962 class LUNodeAdd(LogicalUnit):
5963   """Logical unit for adding node to the cluster.
5964
5965   """
5966   HPATH = "node-add"
5967   HTYPE = constants.HTYPE_NODE
5968   _NFLAGS = ["master_capable", "vm_capable"]
5969
5970   def CheckArguments(self):
5971     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5972     # validate/normalize the node name
5973     self.hostname = netutils.GetHostname(name=self.op.node_name,
5974                                          family=self.primary_ip_family)
5975     self.op.node_name = self.hostname.name
5976
5977     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5978       raise errors.OpPrereqError("Cannot readd the master node",
5979                                  errors.ECODE_STATE)
5980
5981     if self.op.readd and self.op.group:
5982       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5983                                  " being readded", errors.ECODE_INVAL)
5984
5985   def BuildHooksEnv(self):
5986     """Build hooks env.
5987
5988     This will run on all nodes before, and on all nodes + the new node after.
5989
5990     """
5991     return {
5992       "OP_TARGET": self.op.node_name,
5993       "NODE_NAME": self.op.node_name,
5994       "NODE_PIP": self.op.primary_ip,
5995       "NODE_SIP": self.op.secondary_ip,
5996       "MASTER_CAPABLE": str(self.op.master_capable),
5997       "VM_CAPABLE": str(self.op.vm_capable),
5998       }
5999
6000   def BuildHooksNodes(self):
6001     """Build hooks nodes.
6002
6003     """
6004     # Exclude added node
6005     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6006     post_nodes = pre_nodes + [self.op.node_name, ]
6007
6008     return (pre_nodes, post_nodes)
6009
6010   def CheckPrereq(self):
6011     """Check prerequisites.
6012
6013     This checks:
6014      - the new node is not already in the config
6015      - it is resolvable
6016      - its parameters (single/dual homed) matches the cluster
6017
6018     Any errors are signaled by raising errors.OpPrereqError.
6019
6020     """
6021     cfg = self.cfg
6022     hostname = self.hostname
6023     node = hostname.name
6024     primary_ip = self.op.primary_ip = hostname.ip
6025     if self.op.secondary_ip is None:
6026       if self.primary_ip_family == netutils.IP6Address.family:
6027         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6028                                    " IPv4 address must be given as secondary",
6029                                    errors.ECODE_INVAL)
6030       self.op.secondary_ip = primary_ip
6031
6032     secondary_ip = self.op.secondary_ip
6033     if not netutils.IP4Address.IsValid(secondary_ip):
6034       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6035                                  " address" % secondary_ip, errors.ECODE_INVAL)
6036
6037     node_list = cfg.GetNodeList()
6038     if not self.op.readd and node in node_list:
6039       raise errors.OpPrereqError("Node %s is already in the configuration" %
6040                                  node, errors.ECODE_EXISTS)
6041     elif self.op.readd and node not in node_list:
6042       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6043                                  errors.ECODE_NOENT)
6044
6045     self.changed_primary_ip = False
6046
6047     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6048       if self.op.readd and node == existing_node_name:
6049         if existing_node.secondary_ip != secondary_ip:
6050           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6051                                      " address configuration as before",
6052                                      errors.ECODE_INVAL)
6053         if existing_node.primary_ip != primary_ip:
6054           self.changed_primary_ip = True
6055
6056         continue
6057
6058       if (existing_node.primary_ip == primary_ip or
6059           existing_node.secondary_ip == primary_ip or
6060           existing_node.primary_ip == secondary_ip or
6061           existing_node.secondary_ip == secondary_ip):
6062         raise errors.OpPrereqError("New node ip address(es) conflict with"
6063                                    " existing node %s" % existing_node.name,
6064                                    errors.ECODE_NOTUNIQUE)
6065
6066     # After this 'if' block, None is no longer a valid value for the
6067     # _capable op attributes
6068     if self.op.readd:
6069       old_node = self.cfg.GetNodeInfo(node)
6070       assert old_node is not None, "Can't retrieve locked node %s" % node
6071       for attr in self._NFLAGS:
6072         if getattr(self.op, attr) is None:
6073           setattr(self.op, attr, getattr(old_node, attr))
6074     else:
6075       for attr in self._NFLAGS:
6076         if getattr(self.op, attr) is None:
6077           setattr(self.op, attr, True)
6078
6079     if self.op.readd and not self.op.vm_capable:
6080       pri, sec = cfg.GetNodeInstances(node)
6081       if pri or sec:
6082         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6083                                    " flag set to false, but it already holds"
6084                                    " instances" % node,
6085                                    errors.ECODE_STATE)
6086
6087     # check that the type of the node (single versus dual homed) is the
6088     # same as for the master
6089     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6090     master_singlehomed = myself.secondary_ip == myself.primary_ip
6091     newbie_singlehomed = secondary_ip == primary_ip
6092     if master_singlehomed != newbie_singlehomed:
6093       if master_singlehomed:
6094         raise errors.OpPrereqError("The master has no secondary ip but the"
6095                                    " new node has one",
6096                                    errors.ECODE_INVAL)
6097       else:
6098         raise errors.OpPrereqError("The master has a secondary ip but the"
6099                                    " new node doesn't have one",
6100                                    errors.ECODE_INVAL)
6101
6102     # checks reachability
6103     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6104       raise errors.OpPrereqError("Node not reachable by ping",
6105                                  errors.ECODE_ENVIRON)
6106
6107     if not newbie_singlehomed:
6108       # check reachability from my secondary ip to newbie's secondary ip
6109       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6110                               source=myself.secondary_ip):
6111         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6112                                    " based ping to node daemon port",
6113                                    errors.ECODE_ENVIRON)
6114
6115     if self.op.readd:
6116       exceptions = [node]
6117     else:
6118       exceptions = []
6119
6120     if self.op.master_capable:
6121       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6122     else:
6123       self.master_candidate = False
6124
6125     if self.op.readd:
6126       self.new_node = old_node
6127     else:
6128       node_group = cfg.LookupNodeGroup(self.op.group)
6129       self.new_node = objects.Node(name=node,
6130                                    primary_ip=primary_ip,
6131                                    secondary_ip=secondary_ip,
6132                                    master_candidate=self.master_candidate,
6133                                    offline=False, drained=False,
6134                                    group=node_group, ndparams={})
6135
6136     if self.op.ndparams:
6137       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6138
6139     if self.op.hv_state:
6140       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6141
6142     if self.op.disk_state:
6143       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6144
6145     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6146     #       it a property on the base class.
6147     rpcrunner = rpc.DnsOnlyRunner()
6148     result = rpcrunner.call_version([node])[node]
6149     result.Raise("Can't get version information from node %s" % node)
6150     if constants.PROTOCOL_VERSION == result.payload:
6151       logging.info("Communication to node %s fine, sw version %s match",
6152                    node, result.payload)
6153     else:
6154       raise errors.OpPrereqError("Version mismatch master version %s,"
6155                                  " node version %s" %
6156                                  (constants.PROTOCOL_VERSION, result.payload),
6157                                  errors.ECODE_ENVIRON)
6158
6159     vg_name = cfg.GetVGName()
6160     if vg_name is not None:
6161       vparams = {constants.NV_PVLIST: [vg_name]}
6162       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6163       if self.op.ndparams:
6164         excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6165                                          excl_stor)
6166       cname = self.cfg.GetClusterName()
6167       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6168       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6169       if errmsgs:
6170         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6171                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6172
6173   def Exec(self, feedback_fn):
6174     """Adds the new node to the cluster.
6175
6176     """
6177     new_node = self.new_node
6178     node = new_node.name
6179
6180     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6181       "Not owning BGL"
6182
6183     # We adding a new node so we assume it's powered
6184     new_node.powered = True
6185
6186     # for re-adds, reset the offline/drained/master-candidate flags;
6187     # we need to reset here, otherwise offline would prevent RPC calls
6188     # later in the procedure; this also means that if the re-add
6189     # fails, we are left with a non-offlined, broken node
6190     if self.op.readd:
6191       new_node.drained = new_node.offline = False # pylint: disable=W0201
6192       self.LogInfo("Readding a node, the offline/drained flags were reset")
6193       # if we demote the node, we do cleanup later in the procedure
6194       new_node.master_candidate = self.master_candidate
6195       if self.changed_primary_ip:
6196         new_node.primary_ip = self.op.primary_ip
6197
6198     # copy the master/vm_capable flags
6199     for attr in self._NFLAGS:
6200       setattr(new_node, attr, getattr(self.op, attr))
6201
6202     # notify the user about any possible mc promotion
6203     if new_node.master_candidate:
6204       self.LogInfo("Node will be a master candidate")
6205
6206     if self.op.ndparams:
6207       new_node.ndparams = self.op.ndparams
6208     else:
6209       new_node.ndparams = {}
6210
6211     if self.op.hv_state:
6212       new_node.hv_state_static = self.new_hv_state
6213
6214     if self.op.disk_state:
6215       new_node.disk_state_static = self.new_disk_state
6216
6217     # Add node to our /etc/hosts, and add key to known_hosts
6218     if self.cfg.GetClusterInfo().modify_etc_hosts:
6219       master_node = self.cfg.GetMasterNode()
6220       result = self.rpc.call_etc_hosts_modify(master_node,
6221                                               constants.ETC_HOSTS_ADD,
6222                                               self.hostname.name,
6223                                               self.hostname.ip)
6224       result.Raise("Can't update hosts file with new host data")
6225
6226     if new_node.secondary_ip != new_node.primary_ip:
6227       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6228                                False)
6229
6230     node_verify_list = [self.cfg.GetMasterNode()]
6231     node_verify_param = {
6232       constants.NV_NODELIST: ([node], {}),
6233       # TODO: do a node-net-test as well?
6234     }
6235
6236     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6237                                        self.cfg.GetClusterName())
6238     for verifier in node_verify_list:
6239       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6240       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6241       if nl_payload:
6242         for failed in nl_payload:
6243           feedback_fn("ssh/hostname verification failed"
6244                       " (checking from %s): %s" %
6245                       (verifier, nl_payload[failed]))
6246         raise errors.OpExecError("ssh/hostname verification failed")
6247
6248     if self.op.readd:
6249       _RedistributeAncillaryFiles(self)
6250       self.context.ReaddNode(new_node)
6251       # make sure we redistribute the config
6252       self.cfg.Update(new_node, feedback_fn)
6253       # and make sure the new node will not have old files around
6254       if not new_node.master_candidate:
6255         result = self.rpc.call_node_demote_from_mc(new_node.name)
6256         msg = result.fail_msg
6257         if msg:
6258           self.LogWarning("Node failed to demote itself from master"
6259                           " candidate status: %s" % msg)
6260     else:
6261       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6262                                   additional_vm=self.op.vm_capable)
6263       self.context.AddNode(new_node, self.proc.GetECId())
6264
6265
6266 class LUNodeSetParams(LogicalUnit):
6267   """Modifies the parameters of a node.
6268
6269   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6270       to the node role (as _ROLE_*)
6271   @cvar _R2F: a dictionary from node role to tuples of flags
6272   @cvar _FLAGS: a list of attribute names corresponding to the flags
6273
6274   """
6275   HPATH = "node-modify"
6276   HTYPE = constants.HTYPE_NODE
6277   REQ_BGL = False
6278   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6279   _F2R = {
6280     (True, False, False): _ROLE_CANDIDATE,
6281     (False, True, False): _ROLE_DRAINED,
6282     (False, False, True): _ROLE_OFFLINE,
6283     (False, False, False): _ROLE_REGULAR,
6284     }
6285   _R2F = dict((v, k) for k, v in _F2R.items())
6286   _FLAGS = ["master_candidate", "drained", "offline"]
6287
6288   def CheckArguments(self):
6289     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6290     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6291                 self.op.master_capable, self.op.vm_capable,
6292                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6293                 self.op.disk_state]
6294     if all_mods.count(None) == len(all_mods):
6295       raise errors.OpPrereqError("Please pass at least one modification",
6296                                  errors.ECODE_INVAL)
6297     if all_mods.count(True) > 1:
6298       raise errors.OpPrereqError("Can't set the node into more than one"
6299                                  " state at the same time",
6300                                  errors.ECODE_INVAL)
6301
6302     # Boolean value that tells us whether we might be demoting from MC
6303     self.might_demote = (self.op.master_candidate is False or
6304                          self.op.offline is True or
6305                          self.op.drained is True or
6306                          self.op.master_capable is False)
6307
6308     if self.op.secondary_ip:
6309       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6310         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6311                                    " address" % self.op.secondary_ip,
6312                                    errors.ECODE_INVAL)
6313
6314     self.lock_all = self.op.auto_promote and self.might_demote
6315     self.lock_instances = self.op.secondary_ip is not None
6316
6317   def _InstanceFilter(self, instance):
6318     """Filter for getting affected instances.
6319
6320     """
6321     return (instance.disk_template in constants.DTS_INT_MIRROR and
6322             self.op.node_name in instance.all_nodes)
6323
6324   def ExpandNames(self):
6325     if self.lock_all:
6326       self.needed_locks = {
6327         locking.LEVEL_NODE: locking.ALL_SET,
6328
6329         # Block allocations when all nodes are locked
6330         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6331         }
6332     else:
6333       self.needed_locks = {
6334         locking.LEVEL_NODE: self.op.node_name,
6335         }
6336
6337     # Since modifying a node can have severe effects on currently running
6338     # operations the resource lock is at least acquired in shared mode
6339     self.needed_locks[locking.LEVEL_NODE_RES] = \
6340       self.needed_locks[locking.LEVEL_NODE]
6341
6342     # Get all locks except nodes in shared mode; they are not used for anything
6343     # but read-only access
6344     self.share_locks = _ShareAll()
6345     self.share_locks[locking.LEVEL_NODE] = 0
6346     self.share_locks[locking.LEVEL_NODE_RES] = 0
6347     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6348
6349     if self.lock_instances:
6350       self.needed_locks[locking.LEVEL_INSTANCE] = \
6351         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6352
6353   def BuildHooksEnv(self):
6354     """Build hooks env.
6355
6356     This runs on the master node.
6357
6358     """
6359     return {
6360       "OP_TARGET": self.op.node_name,
6361       "MASTER_CANDIDATE": str(self.op.master_candidate),
6362       "OFFLINE": str(self.op.offline),
6363       "DRAINED": str(self.op.drained),
6364       "MASTER_CAPABLE": str(self.op.master_capable),
6365       "VM_CAPABLE": str(self.op.vm_capable),
6366       }
6367
6368   def BuildHooksNodes(self):
6369     """Build hooks nodes.
6370
6371     """
6372     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6373     return (nl, nl)
6374
6375   def CheckPrereq(self):
6376     """Check prerequisites.
6377
6378     This only checks the instance list against the existing names.
6379
6380     """
6381     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6382
6383     if self.lock_instances:
6384       affected_instances = \
6385         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6386
6387       # Verify instance locks
6388       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6389       wanted_instances = frozenset(affected_instances.keys())
6390       if wanted_instances - owned_instances:
6391         raise errors.OpPrereqError("Instances affected by changing node %s's"
6392                                    " secondary IP address have changed since"
6393                                    " locks were acquired, wanted '%s', have"
6394                                    " '%s'; retry the operation" %
6395                                    (self.op.node_name,
6396                                     utils.CommaJoin(wanted_instances),
6397                                     utils.CommaJoin(owned_instances)),
6398                                    errors.ECODE_STATE)
6399     else:
6400       affected_instances = None
6401
6402     if (self.op.master_candidate is not None or
6403         self.op.drained is not None or
6404         self.op.offline is not None):
6405       # we can't change the master's node flags
6406       if self.op.node_name == self.cfg.GetMasterNode():
6407         raise errors.OpPrereqError("The master role can be changed"
6408                                    " only via master-failover",
6409                                    errors.ECODE_INVAL)
6410
6411     if self.op.master_candidate and not node.master_capable:
6412       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6413                                  " it a master candidate" % node.name,
6414                                  errors.ECODE_STATE)
6415
6416     if self.op.vm_capable is False:
6417       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6418       if ipri or isec:
6419         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6420                                    " the vm_capable flag" % node.name,
6421                                    errors.ECODE_STATE)
6422
6423     if node.master_candidate and self.might_demote and not self.lock_all:
6424       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6425       # check if after removing the current node, we're missing master
6426       # candidates
6427       (mc_remaining, mc_should, _) = \
6428           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6429       if mc_remaining < mc_should:
6430         raise errors.OpPrereqError("Not enough master candidates, please"
6431                                    " pass auto promote option to allow"
6432                                    " promotion (--auto-promote or RAPI"
6433                                    " auto_promote=True)", errors.ECODE_STATE)
6434
6435     self.old_flags = old_flags = (node.master_candidate,
6436                                   node.drained, node.offline)
6437     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6438     self.old_role = old_role = self._F2R[old_flags]
6439
6440     # Check for ineffective changes
6441     for attr in self._FLAGS:
6442       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6443         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6444         setattr(self.op, attr, None)
6445
6446     # Past this point, any flag change to False means a transition
6447     # away from the respective state, as only real changes are kept
6448
6449     # TODO: We might query the real power state if it supports OOB
6450     if _SupportsOob(self.cfg, node):
6451       if self.op.offline is False and not (node.powered or
6452                                            self.op.powered is True):
6453         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6454                                     " offline status can be reset") %
6455                                    self.op.node_name, errors.ECODE_STATE)
6456     elif self.op.powered is not None:
6457       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6458                                   " as it does not support out-of-band"
6459                                   " handling") % self.op.node_name,
6460                                  errors.ECODE_STATE)
6461
6462     # If we're being deofflined/drained, we'll MC ourself if needed
6463     if (self.op.drained is False or self.op.offline is False or
6464         (self.op.master_capable and not node.master_capable)):
6465       if _DecideSelfPromotion(self):
6466         self.op.master_candidate = True
6467         self.LogInfo("Auto-promoting node to master candidate")
6468
6469     # If we're no longer master capable, we'll demote ourselves from MC
6470     if self.op.master_capable is False and node.master_candidate:
6471       self.LogInfo("Demoting from master candidate")
6472       self.op.master_candidate = False
6473
6474     # Compute new role
6475     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6476     if self.op.master_candidate:
6477       new_role = self._ROLE_CANDIDATE
6478     elif self.op.drained:
6479       new_role = self._ROLE_DRAINED
6480     elif self.op.offline:
6481       new_role = self._ROLE_OFFLINE
6482     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6483       # False is still in new flags, which means we're un-setting (the
6484       # only) True flag
6485       new_role = self._ROLE_REGULAR
6486     else: # no new flags, nothing, keep old role
6487       new_role = old_role
6488
6489     self.new_role = new_role
6490
6491     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6492       # Trying to transition out of offline status
6493       result = self.rpc.call_version([node.name])[node.name]
6494       if result.fail_msg:
6495         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6496                                    " to report its version: %s" %
6497                                    (node.name, result.fail_msg),
6498                                    errors.ECODE_STATE)
6499       else:
6500         self.LogWarning("Transitioning node from offline to online state"
6501                         " without using re-add. Please make sure the node"
6502                         " is healthy!")
6503
6504     # When changing the secondary ip, verify if this is a single-homed to
6505     # multi-homed transition or vice versa, and apply the relevant
6506     # restrictions.
6507     if self.op.secondary_ip:
6508       # Ok even without locking, because this can't be changed by any LU
6509       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6510       master_singlehomed = master.secondary_ip == master.primary_ip
6511       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6512         if self.op.force and node.name == master.name:
6513           self.LogWarning("Transitioning from single-homed to multi-homed"
6514                           " cluster; all nodes will require a secondary IP"
6515                           " address")
6516         else:
6517           raise errors.OpPrereqError("Changing the secondary ip on a"
6518                                      " single-homed cluster requires the"
6519                                      " --force option to be passed, and the"
6520                                      " target node to be the master",
6521                                      errors.ECODE_INVAL)
6522       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6523         if self.op.force and node.name == master.name:
6524           self.LogWarning("Transitioning from multi-homed to single-homed"
6525                           " cluster; secondary IP addresses will have to be"
6526                           " removed")
6527         else:
6528           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6529                                      " same as the primary IP on a multi-homed"
6530                                      " cluster, unless the --force option is"
6531                                      " passed, and the target node is the"
6532                                      " master", errors.ECODE_INVAL)
6533
6534       assert not (frozenset(affected_instances) -
6535                   self.owned_locks(locking.LEVEL_INSTANCE))
6536
6537       if node.offline:
6538         if affected_instances:
6539           msg = ("Cannot change secondary IP address: offline node has"
6540                  " instances (%s) configured to use it" %
6541                  utils.CommaJoin(affected_instances.keys()))
6542           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6543       else:
6544         # On online nodes, check that no instances are running, and that
6545         # the node has the new ip and we can reach it.
6546         for instance in affected_instances.values():
6547           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6548                               msg="cannot change secondary ip")
6549
6550         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6551         if master.name != node.name:
6552           # check reachability from master secondary ip to new secondary ip
6553           if not netutils.TcpPing(self.op.secondary_ip,
6554                                   constants.DEFAULT_NODED_PORT,
6555                                   source=master.secondary_ip):
6556             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6557                                        " based ping to node daemon port",
6558                                        errors.ECODE_ENVIRON)
6559
6560     if self.op.ndparams:
6561       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6562       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6563       self.new_ndparams = new_ndparams
6564
6565     if self.op.hv_state:
6566       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6567                                                  self.node.hv_state_static)
6568
6569     if self.op.disk_state:
6570       self.new_disk_state = \
6571         _MergeAndVerifyDiskState(self.op.disk_state,
6572                                  self.node.disk_state_static)
6573
6574   def Exec(self, feedback_fn):
6575     """Modifies a node.
6576
6577     """
6578     node = self.node
6579     old_role = self.old_role
6580     new_role = self.new_role
6581
6582     result = []
6583
6584     if self.op.ndparams:
6585       node.ndparams = self.new_ndparams
6586
6587     if self.op.powered is not None:
6588       node.powered = self.op.powered
6589
6590     if self.op.hv_state:
6591       node.hv_state_static = self.new_hv_state
6592
6593     if self.op.disk_state:
6594       node.disk_state_static = self.new_disk_state
6595
6596     for attr in ["master_capable", "vm_capable"]:
6597       val = getattr(self.op, attr)
6598       if val is not None:
6599         setattr(node, attr, val)
6600         result.append((attr, str(val)))
6601
6602     if new_role != old_role:
6603       # Tell the node to demote itself, if no longer MC and not offline
6604       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6605         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6606         if msg:
6607           self.LogWarning("Node failed to demote itself: %s", msg)
6608
6609       new_flags = self._R2F[new_role]
6610       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6611         if of != nf:
6612           result.append((desc, str(nf)))
6613       (node.master_candidate, node.drained, node.offline) = new_flags
6614
6615       # we locked all nodes, we adjust the CP before updating this node
6616       if self.lock_all:
6617         _AdjustCandidatePool(self, [node.name])
6618
6619     if self.op.secondary_ip:
6620       node.secondary_ip = self.op.secondary_ip
6621       result.append(("secondary_ip", self.op.secondary_ip))
6622
6623     # this will trigger configuration file update, if needed
6624     self.cfg.Update(node, feedback_fn)
6625
6626     # this will trigger job queue propagation or cleanup if the mc
6627     # flag changed
6628     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6629       self.context.ReaddNode(node)
6630
6631     return result
6632
6633
6634 class LUNodePowercycle(NoHooksLU):
6635   """Powercycles a node.
6636
6637   """
6638   REQ_BGL = False
6639
6640   def CheckArguments(self):
6641     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6642     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6643       raise errors.OpPrereqError("The node is the master and the force"
6644                                  " parameter was not set",
6645                                  errors.ECODE_INVAL)
6646
6647   def ExpandNames(self):
6648     """Locking for PowercycleNode.
6649
6650     This is a last-resort option and shouldn't block on other
6651     jobs. Therefore, we grab no locks.
6652
6653     """
6654     self.needed_locks = {}
6655
6656   def Exec(self, feedback_fn):
6657     """Reboots a node.
6658
6659     """
6660     result = self.rpc.call_node_powercycle(self.op.node_name,
6661                                            self.cfg.GetHypervisorType())
6662     result.Raise("Failed to schedule the reboot")
6663     return result.payload
6664
6665
6666 class LUClusterQuery(NoHooksLU):
6667   """Query cluster configuration.
6668
6669   """
6670   REQ_BGL = False
6671
6672   def ExpandNames(self):
6673     self.needed_locks = {}
6674
6675   def Exec(self, feedback_fn):
6676     """Return cluster config.
6677
6678     """
6679     cluster = self.cfg.GetClusterInfo()
6680     os_hvp = {}
6681
6682     # Filter just for enabled hypervisors
6683     for os_name, hv_dict in cluster.os_hvp.items():
6684       os_hvp[os_name] = {}
6685       for hv_name, hv_params in hv_dict.items():
6686         if hv_name in cluster.enabled_hypervisors:
6687           os_hvp[os_name][hv_name] = hv_params
6688
6689     # Convert ip_family to ip_version
6690     primary_ip_version = constants.IP4_VERSION
6691     if cluster.primary_ip_family == netutils.IP6Address.family:
6692       primary_ip_version = constants.IP6_VERSION
6693
6694     result = {
6695       "software_version": constants.RELEASE_VERSION,
6696       "protocol_version": constants.PROTOCOL_VERSION,
6697       "config_version": constants.CONFIG_VERSION,
6698       "os_api_version": max(constants.OS_API_VERSIONS),
6699       "export_version": constants.EXPORT_VERSION,
6700       "architecture": runtime.GetArchInfo(),
6701       "name": cluster.cluster_name,
6702       "master": cluster.master_node,
6703       "default_hypervisor": cluster.primary_hypervisor,
6704       "enabled_hypervisors": cluster.enabled_hypervisors,
6705       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6706                         for hypervisor_name in cluster.enabled_hypervisors]),
6707       "os_hvp": os_hvp,
6708       "beparams": cluster.beparams,
6709       "osparams": cluster.osparams,
6710       "ipolicy": cluster.ipolicy,
6711       "nicparams": cluster.nicparams,
6712       "ndparams": cluster.ndparams,
6713       "diskparams": cluster.diskparams,
6714       "candidate_pool_size": cluster.candidate_pool_size,
6715       "master_netdev": cluster.master_netdev,
6716       "master_netmask": cluster.master_netmask,
6717       "use_external_mip_script": cluster.use_external_mip_script,
6718       "volume_group_name": cluster.volume_group_name,
6719       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6720       "file_storage_dir": cluster.file_storage_dir,
6721       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6722       "maintain_node_health": cluster.maintain_node_health,
6723       "ctime": cluster.ctime,
6724       "mtime": cluster.mtime,
6725       "uuid": cluster.uuid,
6726       "tags": list(cluster.GetTags()),
6727       "uid_pool": cluster.uid_pool,
6728       "default_iallocator": cluster.default_iallocator,
6729       "reserved_lvs": cluster.reserved_lvs,
6730       "primary_ip_version": primary_ip_version,
6731       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6732       "hidden_os": cluster.hidden_os,
6733       "blacklisted_os": cluster.blacklisted_os,
6734       }
6735
6736     return result
6737
6738
6739 class LUClusterConfigQuery(NoHooksLU):
6740   """Return configuration values.
6741
6742   """
6743   REQ_BGL = False
6744
6745   def CheckArguments(self):
6746     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6747
6748   def ExpandNames(self):
6749     self.cq.ExpandNames(self)
6750
6751   def DeclareLocks(self, level):
6752     self.cq.DeclareLocks(self, level)
6753
6754   def Exec(self, feedback_fn):
6755     result = self.cq.OldStyleQuery(self)
6756
6757     assert len(result) == 1
6758
6759     return result[0]
6760
6761
6762 class _ClusterQuery(_QueryBase):
6763   FIELDS = query.CLUSTER_FIELDS
6764
6765   #: Do not sort (there is only one item)
6766   SORT_FIELD = None
6767
6768   def ExpandNames(self, lu):
6769     lu.needed_locks = {}
6770
6771     # The following variables interact with _QueryBase._GetNames
6772     self.wanted = locking.ALL_SET
6773     self.do_locking = self.use_locking
6774
6775     if self.do_locking:
6776       raise errors.OpPrereqError("Can not use locking for cluster queries",
6777                                  errors.ECODE_INVAL)
6778
6779   def DeclareLocks(self, lu, level):
6780     pass
6781
6782   def _GetQueryData(self, lu):
6783     """Computes the list of nodes and their attributes.
6784
6785     """
6786     # Locking is not used
6787     assert not (compat.any(lu.glm.is_owned(level)
6788                            for level in locking.LEVELS
6789                            if level != locking.LEVEL_CLUSTER) or
6790                 self.do_locking or self.use_locking)
6791
6792     if query.CQ_CONFIG in self.requested_data:
6793       cluster = lu.cfg.GetClusterInfo()
6794     else:
6795       cluster = NotImplemented
6796
6797     if query.CQ_QUEUE_DRAINED in self.requested_data:
6798       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6799     else:
6800       drain_flag = NotImplemented
6801
6802     if query.CQ_WATCHER_PAUSE in self.requested_data:
6803       master_name = lu.cfg.GetMasterNode()
6804
6805       result = lu.rpc.call_get_watcher_pause(master_name)
6806       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6807                    master_name)
6808
6809       watcher_pause = result.payload
6810     else:
6811       watcher_pause = NotImplemented
6812
6813     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6814
6815
6816 class LUInstanceActivateDisks(NoHooksLU):
6817   """Bring up an instance's disks.
6818
6819   """
6820   REQ_BGL = False
6821
6822   def ExpandNames(self):
6823     self._ExpandAndLockInstance()
6824     self.needed_locks[locking.LEVEL_NODE] = []
6825     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6826
6827   def DeclareLocks(self, level):
6828     if level == locking.LEVEL_NODE:
6829       self._LockInstancesNodes()
6830
6831   def CheckPrereq(self):
6832     """Check prerequisites.
6833
6834     This checks that the instance is in the cluster.
6835
6836     """
6837     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6838     assert self.instance is not None, \
6839       "Cannot retrieve locked instance %s" % self.op.instance_name
6840     _CheckNodeOnline(self, self.instance.primary_node)
6841
6842   def Exec(self, feedback_fn):
6843     """Activate the disks.
6844
6845     """
6846     disks_ok, disks_info = \
6847               _AssembleInstanceDisks(self, self.instance,
6848                                      ignore_size=self.op.ignore_size)
6849     if not disks_ok:
6850       raise errors.OpExecError("Cannot activate block devices")
6851
6852     if self.op.wait_for_sync:
6853       if not _WaitForSync(self, self.instance):
6854         raise errors.OpExecError("Some disks of the instance are degraded!")
6855
6856     return disks_info
6857
6858
6859 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6860                            ignore_size=False):
6861   """Prepare the block devices for an instance.
6862
6863   This sets up the block devices on all nodes.
6864
6865   @type lu: L{LogicalUnit}
6866   @param lu: the logical unit on whose behalf we execute
6867   @type instance: L{objects.Instance}
6868   @param instance: the instance for whose disks we assemble
6869   @type disks: list of L{objects.Disk} or None
6870   @param disks: which disks to assemble (or all, if None)
6871   @type ignore_secondaries: boolean
6872   @param ignore_secondaries: if true, errors on secondary nodes
6873       won't result in an error return from the function
6874   @type ignore_size: boolean
6875   @param ignore_size: if true, the current known size of the disk
6876       will not be used during the disk activation, useful for cases
6877       when the size is wrong
6878   @return: False if the operation failed, otherwise a list of
6879       (host, instance_visible_name, node_visible_name)
6880       with the mapping from node devices to instance devices
6881
6882   """
6883   device_info = []
6884   disks_ok = True
6885   iname = instance.name
6886   disks = _ExpandCheckDisks(instance, disks)
6887
6888   # With the two passes mechanism we try to reduce the window of
6889   # opportunity for the race condition of switching DRBD to primary
6890   # before handshaking occured, but we do not eliminate it
6891
6892   # The proper fix would be to wait (with some limits) until the
6893   # connection has been made and drbd transitions from WFConnection
6894   # into any other network-connected state (Connected, SyncTarget,
6895   # SyncSource, etc.)
6896
6897   # 1st pass, assemble on all nodes in secondary mode
6898   for idx, inst_disk in enumerate(disks):
6899     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6900       if ignore_size:
6901         node_disk = node_disk.Copy()
6902         node_disk.UnsetSize()
6903       lu.cfg.SetDiskID(node_disk, node)
6904       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6905                                              False, idx)
6906       msg = result.fail_msg
6907       if msg:
6908         is_offline_secondary = (node in instance.secondary_nodes and
6909                                 result.offline)
6910         lu.LogWarning("Could not prepare block device %s on node %s"
6911                       " (is_primary=False, pass=1): %s",
6912                       inst_disk.iv_name, node, msg)
6913         if not (ignore_secondaries or is_offline_secondary):
6914           disks_ok = False
6915
6916   # FIXME: race condition on drbd migration to primary
6917
6918   # 2nd pass, do only the primary node
6919   for idx, inst_disk in enumerate(disks):
6920     dev_path = None
6921
6922     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6923       if node != instance.primary_node:
6924         continue
6925       if ignore_size:
6926         node_disk = node_disk.Copy()
6927         node_disk.UnsetSize()
6928       lu.cfg.SetDiskID(node_disk, node)
6929       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6930                                              True, idx)
6931       msg = result.fail_msg
6932       if msg:
6933         lu.LogWarning("Could not prepare block device %s on node %s"
6934                       " (is_primary=True, pass=2): %s",
6935                       inst_disk.iv_name, node, msg)
6936         disks_ok = False
6937       else:
6938         dev_path = result.payload
6939
6940     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6941
6942   # leave the disks configured for the primary node
6943   # this is a workaround that would be fixed better by
6944   # improving the logical/physical id handling
6945   for disk in disks:
6946     lu.cfg.SetDiskID(disk, instance.primary_node)
6947
6948   return disks_ok, device_info
6949
6950
6951 def _StartInstanceDisks(lu, instance, force):
6952   """Start the disks of an instance.
6953
6954   """
6955   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6956                                            ignore_secondaries=force)
6957   if not disks_ok:
6958     _ShutdownInstanceDisks(lu, instance)
6959     if force is not None and not force:
6960       lu.LogWarning("",
6961                     hint=("If the message above refers to a secondary node,"
6962                           " you can retry the operation using '--force'"))
6963     raise errors.OpExecError("Disk consistency error")
6964
6965
6966 class LUInstanceDeactivateDisks(NoHooksLU):
6967   """Shutdown an instance's disks.
6968
6969   """
6970   REQ_BGL = False
6971
6972   def ExpandNames(self):
6973     self._ExpandAndLockInstance()
6974     self.needed_locks[locking.LEVEL_NODE] = []
6975     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6976
6977   def DeclareLocks(self, level):
6978     if level == locking.LEVEL_NODE:
6979       self._LockInstancesNodes()
6980
6981   def CheckPrereq(self):
6982     """Check prerequisites.
6983
6984     This checks that the instance is in the cluster.
6985
6986     """
6987     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6988     assert self.instance is not None, \
6989       "Cannot retrieve locked instance %s" % self.op.instance_name
6990
6991   def Exec(self, feedback_fn):
6992     """Deactivate the disks
6993
6994     """
6995     instance = self.instance
6996     if self.op.force:
6997       _ShutdownInstanceDisks(self, instance)
6998     else:
6999       _SafeShutdownInstanceDisks(self, instance)
7000
7001
7002 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7003   """Shutdown block devices of an instance.
7004
7005   This function checks if an instance is running, before calling
7006   _ShutdownInstanceDisks.
7007
7008   """
7009   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7010   _ShutdownInstanceDisks(lu, instance, disks=disks)
7011
7012
7013 def _ExpandCheckDisks(instance, disks):
7014   """Return the instance disks selected by the disks list
7015
7016   @type disks: list of L{objects.Disk} or None
7017   @param disks: selected disks
7018   @rtype: list of L{objects.Disk}
7019   @return: selected instance disks to act on
7020
7021   """
7022   if disks is None:
7023     return instance.disks
7024   else:
7025     if not set(disks).issubset(instance.disks):
7026       raise errors.ProgrammerError("Can only act on disks belonging to the"
7027                                    " target instance")
7028     return disks
7029
7030
7031 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7032   """Shutdown block devices of an instance.
7033
7034   This does the shutdown on all nodes of the instance.
7035
7036   If the ignore_primary is false, errors on the primary node are
7037   ignored.
7038
7039   """
7040   all_result = True
7041   disks = _ExpandCheckDisks(instance, disks)
7042
7043   for disk in disks:
7044     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7045       lu.cfg.SetDiskID(top_disk, node)
7046       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7047       msg = result.fail_msg
7048       if msg:
7049         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7050                       disk.iv_name, node, msg)
7051         if ((node == instance.primary_node and not ignore_primary) or
7052             (node != instance.primary_node and not result.offline)):
7053           all_result = False
7054   return all_result
7055
7056
7057 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7058   """Checks if a node has enough free memory.
7059
7060   This function checks if a given node has the needed amount of free
7061   memory. In case the node has less memory or we cannot get the
7062   information from the node, this function raises an OpPrereqError
7063   exception.
7064
7065   @type lu: C{LogicalUnit}
7066   @param lu: a logical unit from which we get configuration data
7067   @type node: C{str}
7068   @param node: the node to check
7069   @type reason: C{str}
7070   @param reason: string to use in the error message
7071   @type requested: C{int}
7072   @param requested: the amount of memory in MiB to check for
7073   @type hypervisor_name: C{str}
7074   @param hypervisor_name: the hypervisor to ask for memory stats
7075   @rtype: integer
7076   @return: node current free memory
7077   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7078       we cannot check the node
7079
7080   """
7081   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7082   nodeinfo[node].Raise("Can't get data from node %s" % node,
7083                        prereq=True, ecode=errors.ECODE_ENVIRON)
7084   (_, _, (hv_info, )) = nodeinfo[node].payload
7085
7086   free_mem = hv_info.get("memory_free", None)
7087   if not isinstance(free_mem, int):
7088     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7089                                " was '%s'" % (node, free_mem),
7090                                errors.ECODE_ENVIRON)
7091   if requested > free_mem:
7092     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7093                                " needed %s MiB, available %s MiB" %
7094                                (node, reason, requested, free_mem),
7095                                errors.ECODE_NORES)
7096   return free_mem
7097
7098
7099 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7100   """Checks if nodes have enough free disk space in all the VGs.
7101
7102   This function checks if all given nodes have the needed amount of
7103   free disk. In case any node has less disk or we cannot get the
7104   information from the node, this function raises an OpPrereqError
7105   exception.
7106
7107   @type lu: C{LogicalUnit}
7108   @param lu: a logical unit from which we get configuration data
7109   @type nodenames: C{list}
7110   @param nodenames: the list of node names to check
7111   @type req_sizes: C{dict}
7112   @param req_sizes: the hash of vg and corresponding amount of disk in
7113       MiB to check for
7114   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7115       or we cannot check the node
7116
7117   """
7118   for vg, req_size in req_sizes.items():
7119     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7120
7121
7122 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7123   """Checks if nodes have enough free disk space in the specified VG.
7124
7125   This function checks if all given nodes have the needed amount of
7126   free disk. In case any node has less disk or we cannot get the
7127   information from the node, this function raises an OpPrereqError
7128   exception.
7129
7130   @type lu: C{LogicalUnit}
7131   @param lu: a logical unit from which we get configuration data
7132   @type nodenames: C{list}
7133   @param nodenames: the list of node names to check
7134   @type vg: C{str}
7135   @param vg: the volume group to check
7136   @type requested: C{int}
7137   @param requested: the amount of disk in MiB to check for
7138   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7139       or we cannot check the node
7140
7141   """
7142   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7143   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7144   for node in nodenames:
7145     info = nodeinfo[node]
7146     info.Raise("Cannot get current information from node %s" % node,
7147                prereq=True, ecode=errors.ECODE_ENVIRON)
7148     (_, (vg_info, ), _) = info.payload
7149     vg_free = vg_info.get("vg_free", None)
7150     if not isinstance(vg_free, int):
7151       raise errors.OpPrereqError("Can't compute free disk space on node"
7152                                  " %s for vg %s, result was '%s'" %
7153                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7154     if requested > vg_free:
7155       raise errors.OpPrereqError("Not enough disk space on target node %s"
7156                                  " vg %s: required %d MiB, available %d MiB" %
7157                                  (node, vg, requested, vg_free),
7158                                  errors.ECODE_NORES)
7159
7160
7161 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7162   """Checks if nodes have enough physical CPUs
7163
7164   This function checks if all given nodes have the needed number of
7165   physical CPUs. In case any node has less CPUs or we cannot get the
7166   information from the node, this function raises an OpPrereqError
7167   exception.
7168
7169   @type lu: C{LogicalUnit}
7170   @param lu: a logical unit from which we get configuration data
7171   @type nodenames: C{list}
7172   @param nodenames: the list of node names to check
7173   @type requested: C{int}
7174   @param requested: the minimum acceptable number of physical CPUs
7175   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7176       or we cannot check the node
7177
7178   """
7179   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7180   for node in nodenames:
7181     info = nodeinfo[node]
7182     info.Raise("Cannot get current information from node %s" % node,
7183                prereq=True, ecode=errors.ECODE_ENVIRON)
7184     (_, _, (hv_info, )) = info.payload
7185     num_cpus = hv_info.get("cpu_total", None)
7186     if not isinstance(num_cpus, int):
7187       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7188                                  " on node %s, result was '%s'" %
7189                                  (node, num_cpus), errors.ECODE_ENVIRON)
7190     if requested > num_cpus:
7191       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7192                                  "required" % (node, num_cpus, requested),
7193                                  errors.ECODE_NORES)
7194
7195
7196 class LUInstanceStartup(LogicalUnit):
7197   """Starts an instance.
7198
7199   """
7200   HPATH = "instance-start"
7201   HTYPE = constants.HTYPE_INSTANCE
7202   REQ_BGL = False
7203
7204   def CheckArguments(self):
7205     # extra beparams
7206     if self.op.beparams:
7207       # fill the beparams dict
7208       objects.UpgradeBeParams(self.op.beparams)
7209       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7210
7211   def ExpandNames(self):
7212     self._ExpandAndLockInstance()
7213     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7214
7215   def DeclareLocks(self, level):
7216     if level == locking.LEVEL_NODE_RES:
7217       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7218
7219   def BuildHooksEnv(self):
7220     """Build hooks env.
7221
7222     This runs on master, primary and secondary nodes of the instance.
7223
7224     """
7225     env = {
7226       "FORCE": self.op.force,
7227       }
7228
7229     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7230
7231     return env
7232
7233   def BuildHooksNodes(self):
7234     """Build hooks nodes.
7235
7236     """
7237     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7238     return (nl, nl)
7239
7240   def CheckPrereq(self):
7241     """Check prerequisites.
7242
7243     This checks that the instance is in the cluster.
7244
7245     """
7246     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7247     assert self.instance is not None, \
7248       "Cannot retrieve locked instance %s" % self.op.instance_name
7249
7250     # extra hvparams
7251     if self.op.hvparams:
7252       # check hypervisor parameter syntax (locally)
7253       cluster = self.cfg.GetClusterInfo()
7254       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7255       filled_hvp = cluster.FillHV(instance)
7256       filled_hvp.update(self.op.hvparams)
7257       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7258       hv_type.CheckParameterSyntax(filled_hvp)
7259       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7260
7261     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7262
7263     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7264
7265     if self.primary_offline and self.op.ignore_offline_nodes:
7266       self.LogWarning("Ignoring offline primary node")
7267
7268       if self.op.hvparams or self.op.beparams:
7269         self.LogWarning("Overridden parameters are ignored")
7270     else:
7271       _CheckNodeOnline(self, instance.primary_node)
7272
7273       bep = self.cfg.GetClusterInfo().FillBE(instance)
7274       bep.update(self.op.beparams)
7275
7276       # check bridges existence
7277       _CheckInstanceBridgesExist(self, instance)
7278
7279       remote_info = self.rpc.call_instance_info(instance.primary_node,
7280                                                 instance.name,
7281                                                 instance.hypervisor)
7282       remote_info.Raise("Error checking node %s" % instance.primary_node,
7283                         prereq=True, ecode=errors.ECODE_ENVIRON)
7284       if not remote_info.payload: # not running already
7285         _CheckNodeFreeMemory(self, instance.primary_node,
7286                              "starting instance %s" % instance.name,
7287                              bep[constants.BE_MINMEM], instance.hypervisor)
7288
7289   def Exec(self, feedback_fn):
7290     """Start the instance.
7291
7292     """
7293     instance = self.instance
7294     force = self.op.force
7295
7296     if not self.op.no_remember:
7297       self.cfg.MarkInstanceUp(instance.name)
7298
7299     if self.primary_offline:
7300       assert self.op.ignore_offline_nodes
7301       self.LogInfo("Primary node offline, marked instance as started")
7302     else:
7303       node_current = instance.primary_node
7304
7305       _StartInstanceDisks(self, instance, force)
7306
7307       result = \
7308         self.rpc.call_instance_start(node_current,
7309                                      (instance, self.op.hvparams,
7310                                       self.op.beparams),
7311                                      self.op.startup_paused)
7312       msg = result.fail_msg
7313       if msg:
7314         _ShutdownInstanceDisks(self, instance)
7315         raise errors.OpExecError("Could not start instance: %s" % msg)
7316
7317
7318 class LUInstanceReboot(LogicalUnit):
7319   """Reboot an instance.
7320
7321   """
7322   HPATH = "instance-reboot"
7323   HTYPE = constants.HTYPE_INSTANCE
7324   REQ_BGL = False
7325
7326   def ExpandNames(self):
7327     self._ExpandAndLockInstance()
7328
7329   def BuildHooksEnv(self):
7330     """Build hooks env.
7331
7332     This runs on master, primary and secondary nodes of the instance.
7333
7334     """
7335     env = {
7336       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7337       "REBOOT_TYPE": self.op.reboot_type,
7338       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7339       }
7340
7341     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7342
7343     return env
7344
7345   def BuildHooksNodes(self):
7346     """Build hooks nodes.
7347
7348     """
7349     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7350     return (nl, nl)
7351
7352   def CheckPrereq(self):
7353     """Check prerequisites.
7354
7355     This checks that the instance is in the cluster.
7356
7357     """
7358     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7359     assert self.instance is not None, \
7360       "Cannot retrieve locked instance %s" % self.op.instance_name
7361     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7362     _CheckNodeOnline(self, instance.primary_node)
7363
7364     # check bridges existence
7365     _CheckInstanceBridgesExist(self, instance)
7366
7367   def Exec(self, feedback_fn):
7368     """Reboot the instance.
7369
7370     """
7371     instance = self.instance
7372     ignore_secondaries = self.op.ignore_secondaries
7373     reboot_type = self.op.reboot_type
7374
7375     remote_info = self.rpc.call_instance_info(instance.primary_node,
7376                                               instance.name,
7377                                               instance.hypervisor)
7378     remote_info.Raise("Error checking node %s" % instance.primary_node)
7379     instance_running = bool(remote_info.payload)
7380
7381     node_current = instance.primary_node
7382
7383     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7384                                             constants.INSTANCE_REBOOT_HARD]:
7385       for disk in instance.disks:
7386         self.cfg.SetDiskID(disk, node_current)
7387       result = self.rpc.call_instance_reboot(node_current, instance,
7388                                              reboot_type,
7389                                              self.op.shutdown_timeout)
7390       result.Raise("Could not reboot instance")
7391     else:
7392       if instance_running:
7393         result = self.rpc.call_instance_shutdown(node_current, instance,
7394                                                  self.op.shutdown_timeout)
7395         result.Raise("Could not shutdown instance for full reboot")
7396         _ShutdownInstanceDisks(self, instance)
7397       else:
7398         self.LogInfo("Instance %s was already stopped, starting now",
7399                      instance.name)
7400       _StartInstanceDisks(self, instance, ignore_secondaries)
7401       result = self.rpc.call_instance_start(node_current,
7402                                             (instance, None, None), False)
7403       msg = result.fail_msg
7404       if msg:
7405         _ShutdownInstanceDisks(self, instance)
7406         raise errors.OpExecError("Could not start instance for"
7407                                  " full reboot: %s" % msg)
7408
7409     self.cfg.MarkInstanceUp(instance.name)
7410
7411
7412 class LUInstanceShutdown(LogicalUnit):
7413   """Shutdown an instance.
7414
7415   """
7416   HPATH = "instance-stop"
7417   HTYPE = constants.HTYPE_INSTANCE
7418   REQ_BGL = False
7419
7420   def ExpandNames(self):
7421     self._ExpandAndLockInstance()
7422
7423   def BuildHooksEnv(self):
7424     """Build hooks env.
7425
7426     This runs on master, primary and secondary nodes of the instance.
7427
7428     """
7429     env = _BuildInstanceHookEnvByObject(self, self.instance)
7430     env["TIMEOUT"] = self.op.timeout
7431     return env
7432
7433   def BuildHooksNodes(self):
7434     """Build hooks nodes.
7435
7436     """
7437     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7438     return (nl, nl)
7439
7440   def CheckPrereq(self):
7441     """Check prerequisites.
7442
7443     This checks that the instance is in the cluster.
7444
7445     """
7446     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7447     assert self.instance is not None, \
7448       "Cannot retrieve locked instance %s" % self.op.instance_name
7449
7450     if not self.op.force:
7451       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7452     else:
7453       self.LogWarning("Ignoring offline instance check")
7454
7455     self.primary_offline = \
7456       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7457
7458     if self.primary_offline and self.op.ignore_offline_nodes:
7459       self.LogWarning("Ignoring offline primary node")
7460     else:
7461       _CheckNodeOnline(self, self.instance.primary_node)
7462
7463   def Exec(self, feedback_fn):
7464     """Shutdown the instance.
7465
7466     """
7467     instance = self.instance
7468     node_current = instance.primary_node
7469     timeout = self.op.timeout
7470
7471     # If the instance is offline we shouldn't mark it as down, as that
7472     # resets the offline flag.
7473     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7474       self.cfg.MarkInstanceDown(instance.name)
7475
7476     if self.primary_offline:
7477       assert self.op.ignore_offline_nodes
7478       self.LogInfo("Primary node offline, marked instance as stopped")
7479     else:
7480       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7481       msg = result.fail_msg
7482       if msg:
7483         self.LogWarning("Could not shutdown instance: %s", msg)
7484
7485       _ShutdownInstanceDisks(self, instance)
7486
7487
7488 class LUInstanceReinstall(LogicalUnit):
7489   """Reinstall an instance.
7490
7491   """
7492   HPATH = "instance-reinstall"
7493   HTYPE = constants.HTYPE_INSTANCE
7494   REQ_BGL = False
7495
7496   def ExpandNames(self):
7497     self._ExpandAndLockInstance()
7498
7499   def BuildHooksEnv(self):
7500     """Build hooks env.
7501
7502     This runs on master, primary and secondary nodes of the instance.
7503
7504     """
7505     return _BuildInstanceHookEnvByObject(self, self.instance)
7506
7507   def BuildHooksNodes(self):
7508     """Build hooks nodes.
7509
7510     """
7511     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7512     return (nl, nl)
7513
7514   def CheckPrereq(self):
7515     """Check prerequisites.
7516
7517     This checks that the instance is in the cluster and is not running.
7518
7519     """
7520     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7521     assert instance is not None, \
7522       "Cannot retrieve locked instance %s" % self.op.instance_name
7523     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7524                      " offline, cannot reinstall")
7525
7526     if instance.disk_template == constants.DT_DISKLESS:
7527       raise errors.OpPrereqError("Instance '%s' has no disks" %
7528                                  self.op.instance_name,
7529                                  errors.ECODE_INVAL)
7530     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7531
7532     if self.op.os_type is not None:
7533       # OS verification
7534       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7535       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7536       instance_os = self.op.os_type
7537     else:
7538       instance_os = instance.os
7539
7540     nodelist = list(instance.all_nodes)
7541
7542     if self.op.osparams:
7543       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7544       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7545       self.os_inst = i_osdict # the new dict (without defaults)
7546     else:
7547       self.os_inst = None
7548
7549     self.instance = instance
7550
7551   def Exec(self, feedback_fn):
7552     """Reinstall the instance.
7553
7554     """
7555     inst = self.instance
7556
7557     if self.op.os_type is not None:
7558       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7559       inst.os = self.op.os_type
7560       # Write to configuration
7561       self.cfg.Update(inst, feedback_fn)
7562
7563     _StartInstanceDisks(self, inst, None)
7564     try:
7565       feedback_fn("Running the instance OS create scripts...")
7566       # FIXME: pass debug option from opcode to backend
7567       result = self.rpc.call_instance_os_add(inst.primary_node,
7568                                              (inst, self.os_inst), True,
7569                                              self.op.debug_level)
7570       result.Raise("Could not install OS for instance %s on node %s" %
7571                    (inst.name, inst.primary_node))
7572     finally:
7573       _ShutdownInstanceDisks(self, inst)
7574
7575
7576 class LUInstanceRecreateDisks(LogicalUnit):
7577   """Recreate an instance's missing disks.
7578
7579   """
7580   HPATH = "instance-recreate-disks"
7581   HTYPE = constants.HTYPE_INSTANCE
7582   REQ_BGL = False
7583
7584   _MODIFYABLE = compat.UniqueFrozenset([
7585     constants.IDISK_SIZE,
7586     constants.IDISK_MODE,
7587     ])
7588
7589   # New or changed disk parameters may have different semantics
7590   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7591     constants.IDISK_ADOPT,
7592
7593     # TODO: Implement support changing VG while recreating
7594     constants.IDISK_VG,
7595     constants.IDISK_METAVG,
7596     constants.IDISK_PROVIDER,
7597     ]))
7598
7599   def _RunAllocator(self):
7600     """Run the allocator based on input opcode.
7601
7602     """
7603     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7604
7605     # FIXME
7606     # The allocator should actually run in "relocate" mode, but current
7607     # allocators don't support relocating all the nodes of an instance at
7608     # the same time. As a workaround we use "allocate" mode, but this is
7609     # suboptimal for two reasons:
7610     # - The instance name passed to the allocator is present in the list of
7611     #   existing instances, so there could be a conflict within the
7612     #   internal structures of the allocator. This doesn't happen with the
7613     #   current allocators, but it's a liability.
7614     # - The allocator counts the resources used by the instance twice: once
7615     #   because the instance exists already, and once because it tries to
7616     #   allocate a new instance.
7617     # The allocator could choose some of the nodes on which the instance is
7618     # running, but that's not a problem. If the instance nodes are broken,
7619     # they should be already be marked as drained or offline, and hence
7620     # skipped by the allocator. If instance disks have been lost for other
7621     # reasons, then recreating the disks on the same nodes should be fine.
7622     disk_template = self.instance.disk_template
7623     spindle_use = be_full[constants.BE_SPINDLE_USE]
7624     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7625                                         disk_template=disk_template,
7626                                         tags=list(self.instance.GetTags()),
7627                                         os=self.instance.os,
7628                                         nics=[{}],
7629                                         vcpus=be_full[constants.BE_VCPUS],
7630                                         memory=be_full[constants.BE_MAXMEM],
7631                                         spindle_use=spindle_use,
7632                                         disks=[{constants.IDISK_SIZE: d.size,
7633                                                 constants.IDISK_MODE: d.mode}
7634                                                 for d in self.instance.disks],
7635                                         hypervisor=self.instance.hypervisor,
7636                                         node_whitelist=None)
7637     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7638
7639     ial.Run(self.op.iallocator)
7640
7641     assert req.RequiredNodes() == len(self.instance.all_nodes)
7642
7643     if not ial.success:
7644       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7645                                  " %s" % (self.op.iallocator, ial.info),
7646                                  errors.ECODE_NORES)
7647
7648     self.op.nodes = ial.result
7649     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7650                  self.op.instance_name, self.op.iallocator,
7651                  utils.CommaJoin(ial.result))
7652
7653   def CheckArguments(self):
7654     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7655       # Normalize and convert deprecated list of disk indices
7656       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7657
7658     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7659     if duplicates:
7660       raise errors.OpPrereqError("Some disks have been specified more than"
7661                                  " once: %s" % utils.CommaJoin(duplicates),
7662                                  errors.ECODE_INVAL)
7663
7664     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7665     # when neither iallocator nor nodes are specified
7666     if self.op.iallocator or self.op.nodes:
7667       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7668
7669     for (idx, params) in self.op.disks:
7670       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7671       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7672       if unsupported:
7673         raise errors.OpPrereqError("Parameters for disk %s try to change"
7674                                    " unmodifyable parameter(s): %s" %
7675                                    (idx, utils.CommaJoin(unsupported)),
7676                                    errors.ECODE_INVAL)
7677
7678   def ExpandNames(self):
7679     self._ExpandAndLockInstance()
7680     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7681
7682     if self.op.nodes:
7683       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7684       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7685     else:
7686       self.needed_locks[locking.LEVEL_NODE] = []
7687       if self.op.iallocator:
7688         # iallocator will select a new node in the same group
7689         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7690         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7691
7692     self.needed_locks[locking.LEVEL_NODE_RES] = []
7693
7694   def DeclareLocks(self, level):
7695     if level == locking.LEVEL_NODEGROUP:
7696       assert self.op.iallocator is not None
7697       assert not self.op.nodes
7698       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7699       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7700       # Lock the primary group used by the instance optimistically; this
7701       # requires going via the node before it's locked, requiring
7702       # verification later on
7703       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7704         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7705
7706     elif level == locking.LEVEL_NODE:
7707       # If an allocator is used, then we lock all the nodes in the current
7708       # instance group, as we don't know yet which ones will be selected;
7709       # if we replace the nodes without using an allocator, locks are
7710       # already declared in ExpandNames; otherwise, we need to lock all the
7711       # instance nodes for disk re-creation
7712       if self.op.iallocator:
7713         assert not self.op.nodes
7714         assert not self.needed_locks[locking.LEVEL_NODE]
7715         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7716
7717         # Lock member nodes of the group of the primary node
7718         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7719           self.needed_locks[locking.LEVEL_NODE].extend(
7720             self.cfg.GetNodeGroup(group_uuid).members)
7721
7722         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7723       elif not self.op.nodes:
7724         self._LockInstancesNodes(primary_only=False)
7725     elif level == locking.LEVEL_NODE_RES:
7726       # Copy node locks
7727       self.needed_locks[locking.LEVEL_NODE_RES] = \
7728         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7729
7730   def BuildHooksEnv(self):
7731     """Build hooks env.
7732
7733     This runs on master, primary and secondary nodes of the instance.
7734
7735     """
7736     return _BuildInstanceHookEnvByObject(self, self.instance)
7737
7738   def BuildHooksNodes(self):
7739     """Build hooks nodes.
7740
7741     """
7742     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7743     return (nl, nl)
7744
7745   def CheckPrereq(self):
7746     """Check prerequisites.
7747
7748     This checks that the instance is in the cluster and is not running.
7749
7750     """
7751     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7752     assert instance is not None, \
7753       "Cannot retrieve locked instance %s" % self.op.instance_name
7754     if self.op.nodes:
7755       if len(self.op.nodes) != len(instance.all_nodes):
7756         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7757                                    " %d replacement nodes were specified" %
7758                                    (instance.name, len(instance.all_nodes),
7759                                     len(self.op.nodes)),
7760                                    errors.ECODE_INVAL)
7761       assert instance.disk_template != constants.DT_DRBD8 or \
7762           len(self.op.nodes) == 2
7763       assert instance.disk_template != constants.DT_PLAIN or \
7764           len(self.op.nodes) == 1
7765       primary_node = self.op.nodes[0]
7766     else:
7767       primary_node = instance.primary_node
7768     if not self.op.iallocator:
7769       _CheckNodeOnline(self, primary_node)
7770
7771     if instance.disk_template == constants.DT_DISKLESS:
7772       raise errors.OpPrereqError("Instance '%s' has no disks" %
7773                                  self.op.instance_name, errors.ECODE_INVAL)
7774
7775     # Verify if node group locks are still correct
7776     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7777     if owned_groups:
7778       # Node group locks are acquired only for the primary node (and only
7779       # when the allocator is used)
7780       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7781                                primary_only=True)
7782
7783     # if we replace nodes *and* the old primary is offline, we don't
7784     # check the instance state
7785     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7786     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7787       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7788                           msg="cannot recreate disks")
7789
7790     if self.op.disks:
7791       self.disks = dict(self.op.disks)
7792     else:
7793       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7794
7795     maxidx = max(self.disks.keys())
7796     if maxidx >= len(instance.disks):
7797       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7798                                  errors.ECODE_INVAL)
7799
7800     if ((self.op.nodes or self.op.iallocator) and
7801         sorted(self.disks.keys()) != range(len(instance.disks))):
7802       raise errors.OpPrereqError("Can't recreate disks partially and"
7803                                  " change the nodes at the same time",
7804                                  errors.ECODE_INVAL)
7805
7806     self.instance = instance
7807
7808     if self.op.iallocator:
7809       self._RunAllocator()
7810       # Release unneeded node and node resource locks
7811       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7812       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7813       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7814
7815     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7816
7817   def Exec(self, feedback_fn):
7818     """Recreate the disks.
7819
7820     """
7821     instance = self.instance
7822
7823     assert (self.owned_locks(locking.LEVEL_NODE) ==
7824             self.owned_locks(locking.LEVEL_NODE_RES))
7825
7826     to_skip = []
7827     mods = [] # keeps track of needed changes
7828
7829     for idx, disk in enumerate(instance.disks):
7830       try:
7831         changes = self.disks[idx]
7832       except KeyError:
7833         # Disk should not be recreated
7834         to_skip.append(idx)
7835         continue
7836
7837       # update secondaries for disks, if needed
7838       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7839         # need to update the nodes and minors
7840         assert len(self.op.nodes) == 2
7841         assert len(disk.logical_id) == 6 # otherwise disk internals
7842                                          # have changed
7843         (_, _, old_port, _, _, old_secret) = disk.logical_id
7844         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7845         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7846                   new_minors[0], new_minors[1], old_secret)
7847         assert len(disk.logical_id) == len(new_id)
7848       else:
7849         new_id = None
7850
7851       mods.append((idx, new_id, changes))
7852
7853     # now that we have passed all asserts above, we can apply the mods
7854     # in a single run (to avoid partial changes)
7855     for idx, new_id, changes in mods:
7856       disk = instance.disks[idx]
7857       if new_id is not None:
7858         assert disk.dev_type == constants.LD_DRBD8
7859         disk.logical_id = new_id
7860       if changes:
7861         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7862                     mode=changes.get(constants.IDISK_MODE, None))
7863
7864     # change primary node, if needed
7865     if self.op.nodes:
7866       instance.primary_node = self.op.nodes[0]
7867       self.LogWarning("Changing the instance's nodes, you will have to"
7868                       " remove any disks left on the older nodes manually")
7869
7870     if self.op.nodes:
7871       self.cfg.Update(instance, feedback_fn)
7872
7873     # All touched nodes must be locked
7874     mylocks = self.owned_locks(locking.LEVEL_NODE)
7875     assert mylocks.issuperset(frozenset(instance.all_nodes))
7876     _CreateDisks(self, instance, to_skip=to_skip)
7877
7878
7879 class LUInstanceRename(LogicalUnit):
7880   """Rename an instance.
7881
7882   """
7883   HPATH = "instance-rename"
7884   HTYPE = constants.HTYPE_INSTANCE
7885
7886   def CheckArguments(self):
7887     """Check arguments.
7888
7889     """
7890     if self.op.ip_check and not self.op.name_check:
7891       # TODO: make the ip check more flexible and not depend on the name check
7892       raise errors.OpPrereqError("IP address check requires a name check",
7893                                  errors.ECODE_INVAL)
7894
7895   def BuildHooksEnv(self):
7896     """Build hooks env.
7897
7898     This runs on master, primary and secondary nodes of the instance.
7899
7900     """
7901     env = _BuildInstanceHookEnvByObject(self, self.instance)
7902     env["INSTANCE_NEW_NAME"] = self.op.new_name
7903     return env
7904
7905   def BuildHooksNodes(self):
7906     """Build hooks nodes.
7907
7908     """
7909     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7910     return (nl, nl)
7911
7912   def CheckPrereq(self):
7913     """Check prerequisites.
7914
7915     This checks that the instance is in the cluster and is not running.
7916
7917     """
7918     self.op.instance_name = _ExpandInstanceName(self.cfg,
7919                                                 self.op.instance_name)
7920     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7921     assert instance is not None
7922     _CheckNodeOnline(self, instance.primary_node)
7923     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7924                         msg="cannot rename")
7925     self.instance = instance
7926
7927     new_name = self.op.new_name
7928     if self.op.name_check:
7929       hostname = _CheckHostnameSane(self, new_name)
7930       new_name = self.op.new_name = hostname.name
7931       if (self.op.ip_check and
7932           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7933         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7934                                    (hostname.ip, new_name),
7935                                    errors.ECODE_NOTUNIQUE)
7936
7937     instance_list = self.cfg.GetInstanceList()
7938     if new_name in instance_list and new_name != instance.name:
7939       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7940                                  new_name, errors.ECODE_EXISTS)
7941
7942   def Exec(self, feedback_fn):
7943     """Rename the instance.
7944
7945     """
7946     inst = self.instance
7947     old_name = inst.name
7948
7949     rename_file_storage = False
7950     if (inst.disk_template in constants.DTS_FILEBASED and
7951         self.op.new_name != inst.name):
7952       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7953       rename_file_storage = True
7954
7955     self.cfg.RenameInstance(inst.name, self.op.new_name)
7956     # Change the instance lock. This is definitely safe while we hold the BGL.
7957     # Otherwise the new lock would have to be added in acquired mode.
7958     assert self.REQ_BGL
7959     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7960     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7961     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7962
7963     # re-read the instance from the configuration after rename
7964     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7965
7966     if rename_file_storage:
7967       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7968       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7969                                                      old_file_storage_dir,
7970                                                      new_file_storage_dir)
7971       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7972                    " (but the instance has been renamed in Ganeti)" %
7973                    (inst.primary_node, old_file_storage_dir,
7974                     new_file_storage_dir))
7975
7976     _StartInstanceDisks(self, inst, None)
7977     # update info on disks
7978     info = _GetInstanceInfoText(inst)
7979     for (idx, disk) in enumerate(inst.disks):
7980       for node in inst.all_nodes:
7981         self.cfg.SetDiskID(disk, node)
7982         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7983         if result.fail_msg:
7984           self.LogWarning("Error setting info on node %s for disk %s: %s",
7985                           node, idx, result.fail_msg)
7986     try:
7987       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7988                                                  old_name, self.op.debug_level)
7989       msg = result.fail_msg
7990       if msg:
7991         msg = ("Could not run OS rename script for instance %s on node %s"
7992                " (but the instance has been renamed in Ganeti): %s" %
7993                (inst.name, inst.primary_node, msg))
7994         self.LogWarning(msg)
7995     finally:
7996       _ShutdownInstanceDisks(self, inst)
7997
7998     return inst.name
7999
8000
8001 class LUInstanceRemove(LogicalUnit):
8002   """Remove an instance.
8003
8004   """
8005   HPATH = "instance-remove"
8006   HTYPE = constants.HTYPE_INSTANCE
8007   REQ_BGL = False
8008
8009   def ExpandNames(self):
8010     self._ExpandAndLockInstance()
8011     self.needed_locks[locking.LEVEL_NODE] = []
8012     self.needed_locks[locking.LEVEL_NODE_RES] = []
8013     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8014
8015   def DeclareLocks(self, level):
8016     if level == locking.LEVEL_NODE:
8017       self._LockInstancesNodes()
8018     elif level == locking.LEVEL_NODE_RES:
8019       # Copy node locks
8020       self.needed_locks[locking.LEVEL_NODE_RES] = \
8021         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8022
8023   def BuildHooksEnv(self):
8024     """Build hooks env.
8025
8026     This runs on master, primary and secondary nodes of the instance.
8027
8028     """
8029     env = _BuildInstanceHookEnvByObject(self, self.instance)
8030     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8031     return env
8032
8033   def BuildHooksNodes(self):
8034     """Build hooks nodes.
8035
8036     """
8037     nl = [self.cfg.GetMasterNode()]
8038     nl_post = list(self.instance.all_nodes) + nl
8039     return (nl, nl_post)
8040
8041   def CheckPrereq(self):
8042     """Check prerequisites.
8043
8044     This checks that the instance is in the cluster.
8045
8046     """
8047     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8048     assert self.instance is not None, \
8049       "Cannot retrieve locked instance %s" % self.op.instance_name
8050
8051   def Exec(self, feedback_fn):
8052     """Remove the instance.
8053
8054     """
8055     instance = self.instance
8056     logging.info("Shutting down instance %s on node %s",
8057                  instance.name, instance.primary_node)
8058
8059     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8060                                              self.op.shutdown_timeout)
8061     msg = result.fail_msg
8062     if msg:
8063       if self.op.ignore_failures:
8064         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8065       else:
8066         raise errors.OpExecError("Could not shutdown instance %s on"
8067                                  " node %s: %s" %
8068                                  (instance.name, instance.primary_node, msg))
8069
8070     assert (self.owned_locks(locking.LEVEL_NODE) ==
8071             self.owned_locks(locking.LEVEL_NODE_RES))
8072     assert not (set(instance.all_nodes) -
8073                 self.owned_locks(locking.LEVEL_NODE)), \
8074       "Not owning correct locks"
8075
8076     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8077
8078
8079 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8080   """Utility function to remove an instance.
8081
8082   """
8083   logging.info("Removing block devices for instance %s", instance.name)
8084
8085   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8086     if not ignore_failures:
8087       raise errors.OpExecError("Can't remove instance's disks")
8088     feedback_fn("Warning: can't remove instance's disks")
8089
8090   logging.info("Removing instance %s out of cluster config", instance.name)
8091
8092   lu.cfg.RemoveInstance(instance.name)
8093
8094   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8095     "Instance lock removal conflict"
8096
8097   # Remove lock for the instance
8098   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8099
8100
8101 class LUInstanceQuery(NoHooksLU):
8102   """Logical unit for querying instances.
8103
8104   """
8105   # pylint: disable=W0142
8106   REQ_BGL = False
8107
8108   def CheckArguments(self):
8109     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8110                              self.op.output_fields, self.op.use_locking)
8111
8112   def ExpandNames(self):
8113     self.iq.ExpandNames(self)
8114
8115   def DeclareLocks(self, level):
8116     self.iq.DeclareLocks(self, level)
8117
8118   def Exec(self, feedback_fn):
8119     return self.iq.OldStyleQuery(self)
8120
8121
8122 def _ExpandNamesForMigration(lu):
8123   """Expands names for use with L{TLMigrateInstance}.
8124
8125   @type lu: L{LogicalUnit}
8126
8127   """
8128   if lu.op.target_node is not None:
8129     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8130
8131   lu.needed_locks[locking.LEVEL_NODE] = []
8132   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8133
8134   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8135   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8136
8137   # The node allocation lock is actually only needed for replicated instances
8138   # (e.g. DRBD8) and if an iallocator is used.
8139   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8140
8141
8142 def _DeclareLocksForMigration(lu, level):
8143   """Declares locks for L{TLMigrateInstance}.
8144
8145   @type lu: L{LogicalUnit}
8146   @param level: Lock level
8147
8148   """
8149   if level == locking.LEVEL_NODE_ALLOC:
8150     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8151
8152     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8153
8154     # Node locks are already declared here rather than at LEVEL_NODE as we need
8155     # the instance object anyway to declare the node allocation lock.
8156     if instance.disk_template in constants.DTS_EXT_MIRROR:
8157       if lu.op.target_node is None:
8158         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8159         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8160       else:
8161         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8162                                                lu.op.target_node]
8163       del lu.recalculate_locks[locking.LEVEL_NODE]
8164     else:
8165       lu._LockInstancesNodes() # pylint: disable=W0212
8166
8167   elif level == locking.LEVEL_NODE:
8168     # Node locks are declared together with the node allocation lock
8169     assert (lu.needed_locks[locking.LEVEL_NODE] or
8170             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8171
8172   elif level == locking.LEVEL_NODE_RES:
8173     # Copy node locks
8174     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8175       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8176
8177
8178 class LUInstanceFailover(LogicalUnit):
8179   """Failover an instance.
8180
8181   """
8182   HPATH = "instance-failover"
8183   HTYPE = constants.HTYPE_INSTANCE
8184   REQ_BGL = False
8185
8186   def CheckArguments(self):
8187     """Check the arguments.
8188
8189     """
8190     self.iallocator = getattr(self.op, "iallocator", None)
8191     self.target_node = getattr(self.op, "target_node", None)
8192
8193   def ExpandNames(self):
8194     self._ExpandAndLockInstance()
8195     _ExpandNamesForMigration(self)
8196
8197     self._migrater = \
8198       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8199                         self.op.ignore_consistency, True,
8200                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8201
8202     self.tasklets = [self._migrater]
8203
8204   def DeclareLocks(self, level):
8205     _DeclareLocksForMigration(self, level)
8206
8207   def BuildHooksEnv(self):
8208     """Build hooks env.
8209
8210     This runs on master, primary and secondary nodes of the instance.
8211
8212     """
8213     instance = self._migrater.instance
8214     source_node = instance.primary_node
8215     target_node = self.op.target_node
8216     env = {
8217       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8218       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8219       "OLD_PRIMARY": source_node,
8220       "NEW_PRIMARY": target_node,
8221       }
8222
8223     if instance.disk_template in constants.DTS_INT_MIRROR:
8224       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8225       env["NEW_SECONDARY"] = source_node
8226     else:
8227       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8228
8229     env.update(_BuildInstanceHookEnvByObject(self, instance))
8230
8231     return env
8232
8233   def BuildHooksNodes(self):
8234     """Build hooks nodes.
8235
8236     """
8237     instance = self._migrater.instance
8238     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8239     return (nl, nl + [instance.primary_node])
8240
8241
8242 class LUInstanceMigrate(LogicalUnit):
8243   """Migrate an instance.
8244
8245   This is migration without shutting down, compared to the failover,
8246   which is done with shutdown.
8247
8248   """
8249   HPATH = "instance-migrate"
8250   HTYPE = constants.HTYPE_INSTANCE
8251   REQ_BGL = False
8252
8253   def ExpandNames(self):
8254     self._ExpandAndLockInstance()
8255     _ExpandNamesForMigration(self)
8256
8257     self._migrater = \
8258       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8259                         False, self.op.allow_failover, False,
8260                         self.op.allow_runtime_changes,
8261                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8262                         self.op.ignore_ipolicy)
8263
8264     self.tasklets = [self._migrater]
8265
8266   def DeclareLocks(self, level):
8267     _DeclareLocksForMigration(self, level)
8268
8269   def BuildHooksEnv(self):
8270     """Build hooks env.
8271
8272     This runs on master, primary and secondary nodes of the instance.
8273
8274     """
8275     instance = self._migrater.instance
8276     source_node = instance.primary_node
8277     target_node = self.op.target_node
8278     env = _BuildInstanceHookEnvByObject(self, instance)
8279     env.update({
8280       "MIGRATE_LIVE": self._migrater.live,
8281       "MIGRATE_CLEANUP": self.op.cleanup,
8282       "OLD_PRIMARY": source_node,
8283       "NEW_PRIMARY": target_node,
8284       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8285       })
8286
8287     if instance.disk_template in constants.DTS_INT_MIRROR:
8288       env["OLD_SECONDARY"] = target_node
8289       env["NEW_SECONDARY"] = source_node
8290     else:
8291       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8292
8293     return env
8294
8295   def BuildHooksNodes(self):
8296     """Build hooks nodes.
8297
8298     """
8299     instance = self._migrater.instance
8300     snodes = list(instance.secondary_nodes)
8301     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8302     return (nl, nl)
8303
8304
8305 class LUInstanceMove(LogicalUnit):
8306   """Move an instance by data-copying.
8307
8308   """
8309   HPATH = "instance-move"
8310   HTYPE = constants.HTYPE_INSTANCE
8311   REQ_BGL = False
8312
8313   def ExpandNames(self):
8314     self._ExpandAndLockInstance()
8315     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8316     self.op.target_node = target_node
8317     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8318     self.needed_locks[locking.LEVEL_NODE_RES] = []
8319     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8320
8321   def DeclareLocks(self, level):
8322     if level == locking.LEVEL_NODE:
8323       self._LockInstancesNodes(primary_only=True)
8324     elif level == locking.LEVEL_NODE_RES:
8325       # Copy node locks
8326       self.needed_locks[locking.LEVEL_NODE_RES] = \
8327         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8328
8329   def BuildHooksEnv(self):
8330     """Build hooks env.
8331
8332     This runs on master, primary and secondary nodes of the instance.
8333
8334     """
8335     env = {
8336       "TARGET_NODE": self.op.target_node,
8337       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8338       }
8339     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8340     return env
8341
8342   def BuildHooksNodes(self):
8343     """Build hooks nodes.
8344
8345     """
8346     nl = [
8347       self.cfg.GetMasterNode(),
8348       self.instance.primary_node,
8349       self.op.target_node,
8350       ]
8351     return (nl, nl)
8352
8353   def CheckPrereq(self):
8354     """Check prerequisites.
8355
8356     This checks that the instance is in the cluster.
8357
8358     """
8359     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8360     assert self.instance is not None, \
8361       "Cannot retrieve locked instance %s" % self.op.instance_name
8362
8363     node = self.cfg.GetNodeInfo(self.op.target_node)
8364     assert node is not None, \
8365       "Cannot retrieve locked node %s" % self.op.target_node
8366
8367     self.target_node = target_node = node.name
8368
8369     if target_node == instance.primary_node:
8370       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8371                                  (instance.name, target_node),
8372                                  errors.ECODE_STATE)
8373
8374     bep = self.cfg.GetClusterInfo().FillBE(instance)
8375
8376     for idx, dsk in enumerate(instance.disks):
8377       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8378         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8379                                    " cannot copy" % idx, errors.ECODE_STATE)
8380
8381     _CheckNodeOnline(self, target_node)
8382     _CheckNodeNotDrained(self, target_node)
8383     _CheckNodeVmCapable(self, target_node)
8384     cluster = self.cfg.GetClusterInfo()
8385     group_info = self.cfg.GetNodeGroup(node.group)
8386     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8387     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8388                             ignore=self.op.ignore_ipolicy)
8389
8390     if instance.admin_state == constants.ADMINST_UP:
8391       # check memory requirements on the secondary node
8392       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8393                            instance.name, bep[constants.BE_MAXMEM],
8394                            instance.hypervisor)
8395     else:
8396       self.LogInfo("Not checking memory on the secondary node as"
8397                    " instance will not be started")
8398
8399     # check bridge existance
8400     _CheckInstanceBridgesExist(self, instance, node=target_node)
8401
8402   def Exec(self, feedback_fn):
8403     """Move an instance.
8404
8405     The move is done by shutting it down on its present node, copying
8406     the data over (slow) and starting it on the new node.
8407
8408     """
8409     instance = self.instance
8410
8411     source_node = instance.primary_node
8412     target_node = self.target_node
8413
8414     self.LogInfo("Shutting down instance %s on source node %s",
8415                  instance.name, source_node)
8416
8417     assert (self.owned_locks(locking.LEVEL_NODE) ==
8418             self.owned_locks(locking.LEVEL_NODE_RES))
8419
8420     result = self.rpc.call_instance_shutdown(source_node, instance,
8421                                              self.op.shutdown_timeout)
8422     msg = result.fail_msg
8423     if msg:
8424       if self.op.ignore_consistency:
8425         self.LogWarning("Could not shutdown instance %s on node %s."
8426                         " Proceeding anyway. Please make sure node"
8427                         " %s is down. Error details: %s",
8428                         instance.name, source_node, source_node, msg)
8429       else:
8430         raise errors.OpExecError("Could not shutdown instance %s on"
8431                                  " node %s: %s" %
8432                                  (instance.name, source_node, msg))
8433
8434     # create the target disks
8435     try:
8436       _CreateDisks(self, instance, target_node=target_node)
8437     except errors.OpExecError:
8438       self.LogWarning("Device creation failed, reverting...")
8439       try:
8440         _RemoveDisks(self, instance, target_node=target_node)
8441       finally:
8442         self.cfg.ReleaseDRBDMinors(instance.name)
8443         raise
8444
8445     cluster_name = self.cfg.GetClusterInfo().cluster_name
8446
8447     errs = []
8448     # activate, get path, copy the data over
8449     for idx, disk in enumerate(instance.disks):
8450       self.LogInfo("Copying data for disk %d", idx)
8451       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8452                                                instance.name, True, idx)
8453       if result.fail_msg:
8454         self.LogWarning("Can't assemble newly created disk %d: %s",
8455                         idx, result.fail_msg)
8456         errs.append(result.fail_msg)
8457         break
8458       dev_path = result.payload
8459       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8460                                              target_node, dev_path,
8461                                              cluster_name)
8462       if result.fail_msg:
8463         self.LogWarning("Can't copy data over for disk %d: %s",
8464                         idx, result.fail_msg)
8465         errs.append(result.fail_msg)
8466         break
8467
8468     if errs:
8469       self.LogWarning("Some disks failed to copy, aborting")
8470       try:
8471         _RemoveDisks(self, instance, target_node=target_node)
8472       finally:
8473         self.cfg.ReleaseDRBDMinors(instance.name)
8474         raise errors.OpExecError("Errors during disk copy: %s" %
8475                                  (",".join(errs),))
8476
8477     instance.primary_node = target_node
8478     self.cfg.Update(instance, feedback_fn)
8479
8480     self.LogInfo("Removing the disks on the original node")
8481     _RemoveDisks(self, instance, target_node=source_node)
8482
8483     # Only start the instance if it's marked as up
8484     if instance.admin_state == constants.ADMINST_UP:
8485       self.LogInfo("Starting instance %s on node %s",
8486                    instance.name, target_node)
8487
8488       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8489                                            ignore_secondaries=True)
8490       if not disks_ok:
8491         _ShutdownInstanceDisks(self, instance)
8492         raise errors.OpExecError("Can't activate the instance's disks")
8493
8494       result = self.rpc.call_instance_start(target_node,
8495                                             (instance, None, None), False)
8496       msg = result.fail_msg
8497       if msg:
8498         _ShutdownInstanceDisks(self, instance)
8499         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8500                                  (instance.name, target_node, msg))
8501
8502
8503 class LUNodeMigrate(LogicalUnit):
8504   """Migrate all instances from a node.
8505
8506   """
8507   HPATH = "node-migrate"
8508   HTYPE = constants.HTYPE_NODE
8509   REQ_BGL = False
8510
8511   def CheckArguments(self):
8512     pass
8513
8514   def ExpandNames(self):
8515     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8516
8517     self.share_locks = _ShareAll()
8518     self.needed_locks = {
8519       locking.LEVEL_NODE: [self.op.node_name],
8520       }
8521
8522   def BuildHooksEnv(self):
8523     """Build hooks env.
8524
8525     This runs on the master, the primary and all the secondaries.
8526
8527     """
8528     return {
8529       "NODE_NAME": self.op.node_name,
8530       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8531       }
8532
8533   def BuildHooksNodes(self):
8534     """Build hooks nodes.
8535
8536     """
8537     nl = [self.cfg.GetMasterNode()]
8538     return (nl, nl)
8539
8540   def CheckPrereq(self):
8541     pass
8542
8543   def Exec(self, feedback_fn):
8544     # Prepare jobs for migration instances
8545     allow_runtime_changes = self.op.allow_runtime_changes
8546     jobs = [
8547       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8548                                  mode=self.op.mode,
8549                                  live=self.op.live,
8550                                  iallocator=self.op.iallocator,
8551                                  target_node=self.op.target_node,
8552                                  allow_runtime_changes=allow_runtime_changes,
8553                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8554       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8555
8556     # TODO: Run iallocator in this opcode and pass correct placement options to
8557     # OpInstanceMigrate. Since other jobs can modify the cluster between
8558     # running the iallocator and the actual migration, a good consistency model
8559     # will have to be found.
8560
8561     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8562             frozenset([self.op.node_name]))
8563
8564     return ResultWithJobs(jobs)
8565
8566
8567 class TLMigrateInstance(Tasklet):
8568   """Tasklet class for instance migration.
8569
8570   @type live: boolean
8571   @ivar live: whether the migration will be done live or non-live;
8572       this variable is initalized only after CheckPrereq has run
8573   @type cleanup: boolean
8574   @ivar cleanup: Wheater we cleanup from a failed migration
8575   @type iallocator: string
8576   @ivar iallocator: The iallocator used to determine target_node
8577   @type target_node: string
8578   @ivar target_node: If given, the target_node to reallocate the instance to
8579   @type failover: boolean
8580   @ivar failover: Whether operation results in failover or migration
8581   @type fallback: boolean
8582   @ivar fallback: Whether fallback to failover is allowed if migration not
8583                   possible
8584   @type ignore_consistency: boolean
8585   @ivar ignore_consistency: Wheter we should ignore consistency between source
8586                             and target node
8587   @type shutdown_timeout: int
8588   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8589   @type ignore_ipolicy: bool
8590   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8591
8592   """
8593
8594   # Constants
8595   _MIGRATION_POLL_INTERVAL = 1      # seconds
8596   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8597
8598   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8599                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8600                ignore_ipolicy):
8601     """Initializes this class.
8602
8603     """
8604     Tasklet.__init__(self, lu)
8605
8606     # Parameters
8607     self.instance_name = instance_name
8608     self.cleanup = cleanup
8609     self.live = False # will be overridden later
8610     self.failover = failover
8611     self.fallback = fallback
8612     self.ignore_consistency = ignore_consistency
8613     self.shutdown_timeout = shutdown_timeout
8614     self.ignore_ipolicy = ignore_ipolicy
8615     self.allow_runtime_changes = allow_runtime_changes
8616
8617   def CheckPrereq(self):
8618     """Check prerequisites.
8619
8620     This checks that the instance is in the cluster.
8621
8622     """
8623     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8624     instance = self.cfg.GetInstanceInfo(instance_name)
8625     assert instance is not None
8626     self.instance = instance
8627     cluster = self.cfg.GetClusterInfo()
8628
8629     if (not self.cleanup and
8630         not instance.admin_state == constants.ADMINST_UP and
8631         not self.failover and self.fallback):
8632       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8633                       " switching to failover")
8634       self.failover = True
8635
8636     if instance.disk_template not in constants.DTS_MIRRORED:
8637       if self.failover:
8638         text = "failovers"
8639       else:
8640         text = "migrations"
8641       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8642                                  " %s" % (instance.disk_template, text),
8643                                  errors.ECODE_STATE)
8644
8645     if instance.disk_template in constants.DTS_EXT_MIRROR:
8646       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8647
8648       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8649
8650       if self.lu.op.iallocator:
8651         self._RunAllocator()
8652       else:
8653         # We set set self.target_node as it is required by
8654         # BuildHooksEnv
8655         self.target_node = self.lu.op.target_node
8656
8657       # Check that the target node is correct in terms of instance policy
8658       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8659       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8660       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8661                                                               group_info)
8662       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8663                               ignore=self.ignore_ipolicy)
8664
8665       # self.target_node is already populated, either directly or by the
8666       # iallocator run
8667       target_node = self.target_node
8668       if self.target_node == instance.primary_node:
8669         raise errors.OpPrereqError("Cannot migrate instance %s"
8670                                    " to its primary (%s)" %
8671                                    (instance.name, instance.primary_node),
8672                                    errors.ECODE_STATE)
8673
8674       if len(self.lu.tasklets) == 1:
8675         # It is safe to release locks only when we're the only tasklet
8676         # in the LU
8677         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8678                       keep=[instance.primary_node, self.target_node])
8679         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8680
8681     else:
8682       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8683
8684       secondary_nodes = instance.secondary_nodes
8685       if not secondary_nodes:
8686         raise errors.ConfigurationError("No secondary node but using"
8687                                         " %s disk template" %
8688                                         instance.disk_template)
8689       target_node = secondary_nodes[0]
8690       if self.lu.op.iallocator or (self.lu.op.target_node and
8691                                    self.lu.op.target_node != target_node):
8692         if self.failover:
8693           text = "failed over"
8694         else:
8695           text = "migrated"
8696         raise errors.OpPrereqError("Instances with disk template %s cannot"
8697                                    " be %s to arbitrary nodes"
8698                                    " (neither an iallocator nor a target"
8699                                    " node can be passed)" %
8700                                    (instance.disk_template, text),
8701                                    errors.ECODE_INVAL)
8702       nodeinfo = self.cfg.GetNodeInfo(target_node)
8703       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8704       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8705                                                               group_info)
8706       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8707                               ignore=self.ignore_ipolicy)
8708
8709     i_be = cluster.FillBE(instance)
8710
8711     # check memory requirements on the secondary node
8712     if (not self.cleanup and
8713          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8714       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8715                                                "migrating instance %s" %
8716                                                instance.name,
8717                                                i_be[constants.BE_MINMEM],
8718                                                instance.hypervisor)
8719     else:
8720       self.lu.LogInfo("Not checking memory on the secondary node as"
8721                       " instance will not be started")
8722
8723     # check if failover must be forced instead of migration
8724     if (not self.cleanup and not self.failover and
8725         i_be[constants.BE_ALWAYS_FAILOVER]):
8726       self.lu.LogInfo("Instance configured to always failover; fallback"
8727                       " to failover")
8728       self.failover = True
8729
8730     # check bridge existance
8731     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8732
8733     if not self.cleanup:
8734       _CheckNodeNotDrained(self.lu, target_node)
8735       if not self.failover:
8736         result = self.rpc.call_instance_migratable(instance.primary_node,
8737                                                    instance)
8738         if result.fail_msg and self.fallback:
8739           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8740                           " failover")
8741           self.failover = True
8742         else:
8743           result.Raise("Can't migrate, please use failover",
8744                        prereq=True, ecode=errors.ECODE_STATE)
8745
8746     assert not (self.failover and self.cleanup)
8747
8748     if not self.failover:
8749       if self.lu.op.live is not None and self.lu.op.mode is not None:
8750         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8751                                    " parameters are accepted",
8752                                    errors.ECODE_INVAL)
8753       if self.lu.op.live is not None:
8754         if self.lu.op.live:
8755           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8756         else:
8757           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8758         # reset the 'live' parameter to None so that repeated
8759         # invocations of CheckPrereq do not raise an exception
8760         self.lu.op.live = None
8761       elif self.lu.op.mode is None:
8762         # read the default value from the hypervisor
8763         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8764         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8765
8766       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8767     else:
8768       # Failover is never live
8769       self.live = False
8770
8771     if not (self.failover or self.cleanup):
8772       remote_info = self.rpc.call_instance_info(instance.primary_node,
8773                                                 instance.name,
8774                                                 instance.hypervisor)
8775       remote_info.Raise("Error checking instance on node %s" %
8776                         instance.primary_node)
8777       instance_running = bool(remote_info.payload)
8778       if instance_running:
8779         self.current_mem = int(remote_info.payload["memory"])
8780
8781   def _RunAllocator(self):
8782     """Run the allocator based on input opcode.
8783
8784     """
8785     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8786
8787     # FIXME: add a self.ignore_ipolicy option
8788     req = iallocator.IAReqRelocate(name=self.instance_name,
8789                                    relocate_from=[self.instance.primary_node])
8790     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8791
8792     ial.Run(self.lu.op.iallocator)
8793
8794     if not ial.success:
8795       raise errors.OpPrereqError("Can't compute nodes using"
8796                                  " iallocator '%s': %s" %
8797                                  (self.lu.op.iallocator, ial.info),
8798                                  errors.ECODE_NORES)
8799     self.target_node = ial.result[0]
8800     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8801                     self.instance_name, self.lu.op.iallocator,
8802                     utils.CommaJoin(ial.result))
8803
8804   def _WaitUntilSync(self):
8805     """Poll with custom rpc for disk sync.
8806
8807     This uses our own step-based rpc call.
8808
8809     """
8810     self.feedback_fn("* wait until resync is done")
8811     all_done = False
8812     while not all_done:
8813       all_done = True
8814       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8815                                             self.nodes_ip,
8816                                             (self.instance.disks,
8817                                              self.instance))
8818       min_percent = 100
8819       for node, nres in result.items():
8820         nres.Raise("Cannot resync disks on node %s" % node)
8821         node_done, node_percent = nres.payload
8822         all_done = all_done and node_done
8823         if node_percent is not None:
8824           min_percent = min(min_percent, node_percent)
8825       if not all_done:
8826         if min_percent < 100:
8827           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8828         time.sleep(2)
8829
8830   def _EnsureSecondary(self, node):
8831     """Demote a node to secondary.
8832
8833     """
8834     self.feedback_fn("* switching node %s to secondary mode" % node)
8835
8836     for dev in self.instance.disks:
8837       self.cfg.SetDiskID(dev, node)
8838
8839     result = self.rpc.call_blockdev_close(node, self.instance.name,
8840                                           self.instance.disks)
8841     result.Raise("Cannot change disk to secondary on node %s" % node)
8842
8843   def _GoStandalone(self):
8844     """Disconnect from the network.
8845
8846     """
8847     self.feedback_fn("* changing into standalone mode")
8848     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8849                                                self.instance.disks)
8850     for node, nres in result.items():
8851       nres.Raise("Cannot disconnect disks node %s" % node)
8852
8853   def _GoReconnect(self, multimaster):
8854     """Reconnect to the network.
8855
8856     """
8857     if multimaster:
8858       msg = "dual-master"
8859     else:
8860       msg = "single-master"
8861     self.feedback_fn("* changing disks into %s mode" % msg)
8862     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8863                                            (self.instance.disks, self.instance),
8864                                            self.instance.name, multimaster)
8865     for node, nres in result.items():
8866       nres.Raise("Cannot change disks config on node %s" % node)
8867
8868   def _ExecCleanup(self):
8869     """Try to cleanup after a failed migration.
8870
8871     The cleanup is done by:
8872       - check that the instance is running only on one node
8873         (and update the config if needed)
8874       - change disks on its secondary node to secondary
8875       - wait until disks are fully synchronized
8876       - disconnect from the network
8877       - change disks into single-master mode
8878       - wait again until disks are fully synchronized
8879
8880     """
8881     instance = self.instance
8882     target_node = self.target_node
8883     source_node = self.source_node
8884
8885     # check running on only one node
8886     self.feedback_fn("* checking where the instance actually runs"
8887                      " (if this hangs, the hypervisor might be in"
8888                      " a bad state)")
8889     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8890     for node, result in ins_l.items():
8891       result.Raise("Can't contact node %s" % node)
8892
8893     runningon_source = instance.name in ins_l[source_node].payload
8894     runningon_target = instance.name in ins_l[target_node].payload
8895
8896     if runningon_source and runningon_target:
8897       raise errors.OpExecError("Instance seems to be running on two nodes,"
8898                                " or the hypervisor is confused; you will have"
8899                                " to ensure manually that it runs only on one"
8900                                " and restart this operation")
8901
8902     if not (runningon_source or runningon_target):
8903       raise errors.OpExecError("Instance does not seem to be running at all;"
8904                                " in this case it's safer to repair by"
8905                                " running 'gnt-instance stop' to ensure disk"
8906                                " shutdown, and then restarting it")
8907
8908     if runningon_target:
8909       # the migration has actually succeeded, we need to update the config
8910       self.feedback_fn("* instance running on secondary node (%s),"
8911                        " updating config" % target_node)
8912       instance.primary_node = target_node
8913       self.cfg.Update(instance, self.feedback_fn)
8914       demoted_node = source_node
8915     else:
8916       self.feedback_fn("* instance confirmed to be running on its"
8917                        " primary node (%s)" % source_node)
8918       demoted_node = target_node
8919
8920     if instance.disk_template in constants.DTS_INT_MIRROR:
8921       self._EnsureSecondary(demoted_node)
8922       try:
8923         self._WaitUntilSync()
8924       except errors.OpExecError:
8925         # we ignore here errors, since if the device is standalone, it
8926         # won't be able to sync
8927         pass
8928       self._GoStandalone()
8929       self._GoReconnect(False)
8930       self._WaitUntilSync()
8931
8932     self.feedback_fn("* done")
8933
8934   def _RevertDiskStatus(self):
8935     """Try to revert the disk status after a failed migration.
8936
8937     """
8938     target_node = self.target_node
8939     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8940       return
8941
8942     try:
8943       self._EnsureSecondary(target_node)
8944       self._GoStandalone()
8945       self._GoReconnect(False)
8946       self._WaitUntilSync()
8947     except errors.OpExecError, err:
8948       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8949                          " please try to recover the instance manually;"
8950                          " error '%s'" % str(err))
8951
8952   def _AbortMigration(self):
8953     """Call the hypervisor code to abort a started migration.
8954
8955     """
8956     instance = self.instance
8957     target_node = self.target_node
8958     source_node = self.source_node
8959     migration_info = self.migration_info
8960
8961     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8962                                                                  instance,
8963                                                                  migration_info,
8964                                                                  False)
8965     abort_msg = abort_result.fail_msg
8966     if abort_msg:
8967       logging.error("Aborting migration failed on target node %s: %s",
8968                     target_node, abort_msg)
8969       # Don't raise an exception here, as we stil have to try to revert the
8970       # disk status, even if this step failed.
8971
8972     abort_result = self.rpc.call_instance_finalize_migration_src(
8973       source_node, instance, False, self.live)
8974     abort_msg = abort_result.fail_msg
8975     if abort_msg:
8976       logging.error("Aborting migration failed on source node %s: %s",
8977                     source_node, abort_msg)
8978
8979   def _ExecMigration(self):
8980     """Migrate an instance.
8981
8982     The migrate is done by:
8983       - change the disks into dual-master mode
8984       - wait until disks are fully synchronized again
8985       - migrate the instance
8986       - change disks on the new secondary node (the old primary) to secondary
8987       - wait until disks are fully synchronized
8988       - change disks into single-master mode
8989
8990     """
8991     instance = self.instance
8992     target_node = self.target_node
8993     source_node = self.source_node
8994
8995     # Check for hypervisor version mismatch and warn the user.
8996     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8997                                        None, [self.instance.hypervisor], False)
8998     for ninfo in nodeinfo.values():
8999       ninfo.Raise("Unable to retrieve node information from node '%s'" %
9000                   ninfo.node)
9001     (_, _, (src_info, )) = nodeinfo[source_node].payload
9002     (_, _, (dst_info, )) = nodeinfo[target_node].payload
9003
9004     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9005         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9006       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9007       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9008       if src_version != dst_version:
9009         self.feedback_fn("* warning: hypervisor version mismatch between"
9010                          " source (%s) and target (%s) node" %
9011                          (src_version, dst_version))
9012
9013     self.feedback_fn("* checking disk consistency between source and target")
9014     for (idx, dev) in enumerate(instance.disks):
9015       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9016         raise errors.OpExecError("Disk %s is degraded or not fully"
9017                                  " synchronized on target node,"
9018                                  " aborting migration" % idx)
9019
9020     if self.current_mem > self.tgt_free_mem:
9021       if not self.allow_runtime_changes:
9022         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9023                                  " free memory to fit instance %s on target"
9024                                  " node %s (have %dMB, need %dMB)" %
9025                                  (instance.name, target_node,
9026                                   self.tgt_free_mem, self.current_mem))
9027       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9028       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9029                                                      instance,
9030                                                      self.tgt_free_mem)
9031       rpcres.Raise("Cannot modify instance runtime memory")
9032
9033     # First get the migration information from the remote node
9034     result = self.rpc.call_migration_info(source_node, instance)
9035     msg = result.fail_msg
9036     if msg:
9037       log_err = ("Failed fetching source migration information from %s: %s" %
9038                  (source_node, msg))
9039       logging.error(log_err)
9040       raise errors.OpExecError(log_err)
9041
9042     self.migration_info = migration_info = result.payload
9043
9044     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9045       # Then switch the disks to master/master mode
9046       self._EnsureSecondary(target_node)
9047       self._GoStandalone()
9048       self._GoReconnect(True)
9049       self._WaitUntilSync()
9050
9051     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9052     result = self.rpc.call_accept_instance(target_node,
9053                                            instance,
9054                                            migration_info,
9055                                            self.nodes_ip[target_node])
9056
9057     msg = result.fail_msg
9058     if msg:
9059       logging.error("Instance pre-migration failed, trying to revert"
9060                     " disk status: %s", msg)
9061       self.feedback_fn("Pre-migration failed, aborting")
9062       self._AbortMigration()
9063       self._RevertDiskStatus()
9064       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9065                                (instance.name, msg))
9066
9067     self.feedback_fn("* migrating instance to %s" % target_node)
9068     result = self.rpc.call_instance_migrate(source_node, instance,
9069                                             self.nodes_ip[target_node],
9070                                             self.live)
9071     msg = result.fail_msg
9072     if msg:
9073       logging.error("Instance migration failed, trying to revert"
9074                     " disk status: %s", msg)
9075       self.feedback_fn("Migration failed, aborting")
9076       self._AbortMigration()
9077       self._RevertDiskStatus()
9078       raise errors.OpExecError("Could not migrate instance %s: %s" %
9079                                (instance.name, msg))
9080
9081     self.feedback_fn("* starting memory transfer")
9082     last_feedback = time.time()
9083     while True:
9084       result = self.rpc.call_instance_get_migration_status(source_node,
9085                                                            instance)
9086       msg = result.fail_msg
9087       ms = result.payload   # MigrationStatus instance
9088       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9089         logging.error("Instance migration failed, trying to revert"
9090                       " disk status: %s", msg)
9091         self.feedback_fn("Migration failed, aborting")
9092         self._AbortMigration()
9093         self._RevertDiskStatus()
9094         if not msg:
9095           msg = "hypervisor returned failure"
9096         raise errors.OpExecError("Could not migrate instance %s: %s" %
9097                                  (instance.name, msg))
9098
9099       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9100         self.feedback_fn("* memory transfer complete")
9101         break
9102
9103       if (utils.TimeoutExpired(last_feedback,
9104                                self._MIGRATION_FEEDBACK_INTERVAL) and
9105           ms.transferred_ram is not None):
9106         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9107         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9108         last_feedback = time.time()
9109
9110       time.sleep(self._MIGRATION_POLL_INTERVAL)
9111
9112     result = self.rpc.call_instance_finalize_migration_src(source_node,
9113                                                            instance,
9114                                                            True,
9115                                                            self.live)
9116     msg = result.fail_msg
9117     if msg:
9118       logging.error("Instance migration succeeded, but finalization failed"
9119                     " on the source node: %s", msg)
9120       raise errors.OpExecError("Could not finalize instance migration: %s" %
9121                                msg)
9122
9123     instance.primary_node = target_node
9124
9125     # distribute new instance config to the other nodes
9126     self.cfg.Update(instance, self.feedback_fn)
9127
9128     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9129                                                            instance,
9130                                                            migration_info,
9131                                                            True)
9132     msg = result.fail_msg
9133     if msg:
9134       logging.error("Instance migration succeeded, but finalization failed"
9135                     " on the target node: %s", msg)
9136       raise errors.OpExecError("Could not finalize instance migration: %s" %
9137                                msg)
9138
9139     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9140       self._EnsureSecondary(source_node)
9141       self._WaitUntilSync()
9142       self._GoStandalone()
9143       self._GoReconnect(False)
9144       self._WaitUntilSync()
9145
9146     # If the instance's disk template is `rbd' or `ext' and there was a
9147     # successful migration, unmap the device from the source node.
9148     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9149       disks = _ExpandCheckDisks(instance, instance.disks)
9150       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9151       for disk in disks:
9152         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9153         msg = result.fail_msg
9154         if msg:
9155           logging.error("Migration was successful, but couldn't unmap the"
9156                         " block device %s on source node %s: %s",
9157                         disk.iv_name, source_node, msg)
9158           logging.error("You need to unmap the device %s manually on %s",
9159                         disk.iv_name, source_node)
9160
9161     self.feedback_fn("* done")
9162
9163   def _ExecFailover(self):
9164     """Failover an instance.
9165
9166     The failover is done by shutting it down on its present node and
9167     starting it on the secondary.
9168
9169     """
9170     instance = self.instance
9171     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9172
9173     source_node = instance.primary_node
9174     target_node = self.target_node
9175
9176     if instance.admin_state == constants.ADMINST_UP:
9177       self.feedback_fn("* checking disk consistency between source and target")
9178       for (idx, dev) in enumerate(instance.disks):
9179         # for drbd, these are drbd over lvm
9180         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9181                                      False):
9182           if primary_node.offline:
9183             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9184                              " target node %s" %
9185                              (primary_node.name, idx, target_node))
9186           elif not self.ignore_consistency:
9187             raise errors.OpExecError("Disk %s is degraded on target node,"
9188                                      " aborting failover" % idx)
9189     else:
9190       self.feedback_fn("* not checking disk consistency as instance is not"
9191                        " running")
9192
9193     self.feedback_fn("* shutting down instance on source node")
9194     logging.info("Shutting down instance %s on node %s",
9195                  instance.name, source_node)
9196
9197     result = self.rpc.call_instance_shutdown(source_node, instance,
9198                                              self.shutdown_timeout)
9199     msg = result.fail_msg
9200     if msg:
9201       if self.ignore_consistency or primary_node.offline:
9202         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9203                            " proceeding anyway; please make sure node"
9204                            " %s is down; error details: %s",
9205                            instance.name, source_node, source_node, msg)
9206       else:
9207         raise errors.OpExecError("Could not shutdown instance %s on"
9208                                  " node %s: %s" %
9209                                  (instance.name, source_node, msg))
9210
9211     self.feedback_fn("* deactivating the instance's disks on source node")
9212     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9213       raise errors.OpExecError("Can't shut down the instance's disks")
9214
9215     instance.primary_node = target_node
9216     # distribute new instance config to the other nodes
9217     self.cfg.Update(instance, self.feedback_fn)
9218
9219     # Only start the instance if it's marked as up
9220     if instance.admin_state == constants.ADMINST_UP:
9221       self.feedback_fn("* activating the instance's disks on target node %s" %
9222                        target_node)
9223       logging.info("Starting instance %s on node %s",
9224                    instance.name, target_node)
9225
9226       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9227                                            ignore_secondaries=True)
9228       if not disks_ok:
9229         _ShutdownInstanceDisks(self.lu, instance)
9230         raise errors.OpExecError("Can't activate the instance's disks")
9231
9232       self.feedback_fn("* starting the instance on the target node %s" %
9233                        target_node)
9234       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9235                                             False)
9236       msg = result.fail_msg
9237       if msg:
9238         _ShutdownInstanceDisks(self.lu, instance)
9239         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9240                                  (instance.name, target_node, msg))
9241
9242   def Exec(self, feedback_fn):
9243     """Perform the migration.
9244
9245     """
9246     self.feedback_fn = feedback_fn
9247     self.source_node = self.instance.primary_node
9248
9249     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9250     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9251       self.target_node = self.instance.secondary_nodes[0]
9252       # Otherwise self.target_node has been populated either
9253       # directly, or through an iallocator.
9254
9255     self.all_nodes = [self.source_node, self.target_node]
9256     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9257                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9258
9259     if self.failover:
9260       feedback_fn("Failover instance %s" % self.instance.name)
9261       self._ExecFailover()
9262     else:
9263       feedback_fn("Migrating instance %s" % self.instance.name)
9264
9265       if self.cleanup:
9266         return self._ExecCleanup()
9267       else:
9268         return self._ExecMigration()
9269
9270
9271 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9272                     force_open):
9273   """Wrapper around L{_CreateBlockDevInner}.
9274
9275   This method annotates the root device first.
9276
9277   """
9278   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9279   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9280   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9281                               force_open, excl_stor)
9282
9283
9284 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9285                          info, force_open, excl_stor):
9286   """Create a tree of block devices on a given node.
9287
9288   If this device type has to be created on secondaries, create it and
9289   all its children.
9290
9291   If not, just recurse to children keeping the same 'force' value.
9292
9293   @attention: The device has to be annotated already.
9294
9295   @param lu: the lu on whose behalf we execute
9296   @param node: the node on which to create the device
9297   @type instance: L{objects.Instance}
9298   @param instance: the instance which owns the device
9299   @type device: L{objects.Disk}
9300   @param device: the device to create
9301   @type force_create: boolean
9302   @param force_create: whether to force creation of this device; this
9303       will be change to True whenever we find a device which has
9304       CreateOnSecondary() attribute
9305   @param info: the extra 'metadata' we should attach to the device
9306       (this will be represented as a LVM tag)
9307   @type force_open: boolean
9308   @param force_open: this parameter will be passes to the
9309       L{backend.BlockdevCreate} function where it specifies
9310       whether we run on primary or not, and it affects both
9311       the child assembly and the device own Open() execution
9312   @type excl_stor: boolean
9313   @param excl_stor: Whether exclusive_storage is active for the node
9314
9315   """
9316   if device.CreateOnSecondary():
9317     force_create = True
9318
9319   if device.children:
9320     for child in device.children:
9321       _CreateBlockDevInner(lu, node, instance, child, force_create,
9322                            info, force_open, excl_stor)
9323
9324   if not force_create:
9325     return
9326
9327   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9328                         excl_stor)
9329
9330
9331 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9332                           excl_stor):
9333   """Create a single block device on a given node.
9334
9335   This will not recurse over children of the device, so they must be
9336   created in advance.
9337
9338   @param lu: the lu on whose behalf we execute
9339   @param node: the node on which to create the device
9340   @type instance: L{objects.Instance}
9341   @param instance: the instance which owns the device
9342   @type device: L{objects.Disk}
9343   @param device: the device to create
9344   @param info: the extra 'metadata' we should attach to the device
9345       (this will be represented as a LVM tag)
9346   @type force_open: boolean
9347   @param force_open: this parameter will be passes to the
9348       L{backend.BlockdevCreate} function where it specifies
9349       whether we run on primary or not, and it affects both
9350       the child assembly and the device own Open() execution
9351   @type excl_stor: boolean
9352   @param excl_stor: Whether exclusive_storage is active for the node
9353
9354   """
9355   lu.cfg.SetDiskID(device, node)
9356   result = lu.rpc.call_blockdev_create(node, device, device.size,
9357                                        instance.name, force_open, info,
9358                                        excl_stor)
9359   result.Raise("Can't create block device %s on"
9360                " node %s for instance %s" % (device, node, instance.name))
9361   if device.physical_id is None:
9362     device.physical_id = result.payload
9363
9364
9365 def _GenerateUniqueNames(lu, exts):
9366   """Generate a suitable LV name.
9367
9368   This will generate a logical volume name for the given instance.
9369
9370   """
9371   results = []
9372   for val in exts:
9373     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9374     results.append("%s%s" % (new_id, val))
9375   return results
9376
9377
9378 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9379                          iv_name, p_minor, s_minor):
9380   """Generate a drbd8 device complete with its children.
9381
9382   """
9383   assert len(vgnames) == len(names) == 2
9384   port = lu.cfg.AllocatePort()
9385   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9386
9387   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9388                           logical_id=(vgnames[0], names[0]),
9389                           params={})
9390   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9391                           size=constants.DRBD_META_SIZE,
9392                           logical_id=(vgnames[1], names[1]),
9393                           params={})
9394   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9395                           logical_id=(primary, secondary, port,
9396                                       p_minor, s_minor,
9397                                       shared_secret),
9398                           children=[dev_data, dev_meta],
9399                           iv_name=iv_name, params={})
9400   return drbd_dev
9401
9402
9403 _DISK_TEMPLATE_NAME_PREFIX = {
9404   constants.DT_PLAIN: "",
9405   constants.DT_RBD: ".rbd",
9406   constants.DT_EXT: ".ext",
9407   }
9408
9409
9410 _DISK_TEMPLATE_DEVICE_TYPE = {
9411   constants.DT_PLAIN: constants.LD_LV,
9412   constants.DT_FILE: constants.LD_FILE,
9413   constants.DT_SHARED_FILE: constants.LD_FILE,
9414   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9415   constants.DT_RBD: constants.LD_RBD,
9416   constants.DT_EXT: constants.LD_EXT,
9417   }
9418
9419
9420 def _GenerateDiskTemplate(
9421   lu, template_name, instance_name, primary_node, secondary_nodes,
9422   disk_info, file_storage_dir, file_driver, base_index,
9423   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9424   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9425   """Generate the entire disk layout for a given template type.
9426
9427   """
9428   vgname = lu.cfg.GetVGName()
9429   disk_count = len(disk_info)
9430   disks = []
9431
9432   if template_name == constants.DT_DISKLESS:
9433     pass
9434   elif template_name == constants.DT_DRBD8:
9435     if len(secondary_nodes) != 1:
9436       raise errors.ProgrammerError("Wrong template configuration")
9437     remote_node = secondary_nodes[0]
9438     minors = lu.cfg.AllocateDRBDMinor(
9439       [primary_node, remote_node] * len(disk_info), instance_name)
9440
9441     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9442                                                        full_disk_params)
9443     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9444
9445     names = []
9446     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9447                                                for i in range(disk_count)]):
9448       names.append(lv_prefix + "_data")
9449       names.append(lv_prefix + "_meta")
9450     for idx, disk in enumerate(disk_info):
9451       disk_index = idx + base_index
9452       data_vg = disk.get(constants.IDISK_VG, vgname)
9453       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9454       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9455                                       disk[constants.IDISK_SIZE],
9456                                       [data_vg, meta_vg],
9457                                       names[idx * 2:idx * 2 + 2],
9458                                       "disk/%d" % disk_index,
9459                                       minors[idx * 2], minors[idx * 2 + 1])
9460       disk_dev.mode = disk[constants.IDISK_MODE]
9461       disks.append(disk_dev)
9462   else:
9463     if secondary_nodes:
9464       raise errors.ProgrammerError("Wrong template configuration")
9465
9466     if template_name == constants.DT_FILE:
9467       _req_file_storage()
9468     elif template_name == constants.DT_SHARED_FILE:
9469       _req_shr_file_storage()
9470
9471     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9472     if name_prefix is None:
9473       names = None
9474     else:
9475       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9476                                         (name_prefix, base_index + i)
9477                                         for i in range(disk_count)])
9478
9479     if template_name == constants.DT_PLAIN:
9480
9481       def logical_id_fn(idx, _, disk):
9482         vg = disk.get(constants.IDISK_VG, vgname)
9483         return (vg, names[idx])
9484
9485     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9486       logical_id_fn = \
9487         lambda _, disk_index, disk: (file_driver,
9488                                      "%s/disk%d" % (file_storage_dir,
9489                                                     disk_index))
9490     elif template_name == constants.DT_BLOCK:
9491       logical_id_fn = \
9492         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9493                                        disk[constants.IDISK_ADOPT])
9494     elif template_name == constants.DT_RBD:
9495       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9496     elif template_name == constants.DT_EXT:
9497       def logical_id_fn(idx, _, disk):
9498         provider = disk.get(constants.IDISK_PROVIDER, None)
9499         if provider is None:
9500           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9501                                        " not found", constants.DT_EXT,
9502                                        constants.IDISK_PROVIDER)
9503         return (provider, names[idx])
9504     else:
9505       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9506
9507     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9508
9509     for idx, disk in enumerate(disk_info):
9510       params = {}
9511       # Only for the Ext template add disk_info to params
9512       if template_name == constants.DT_EXT:
9513         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9514         for key in disk:
9515           if key not in constants.IDISK_PARAMS:
9516             params[key] = disk[key]
9517       disk_index = idx + base_index
9518       size = disk[constants.IDISK_SIZE]
9519       feedback_fn("* disk %s, size %s" %
9520                   (disk_index, utils.FormatUnit(size, "h")))
9521       disks.append(objects.Disk(dev_type=dev_type, size=size,
9522                                 logical_id=logical_id_fn(idx, disk_index, disk),
9523                                 iv_name="disk/%d" % disk_index,
9524                                 mode=disk[constants.IDISK_MODE],
9525                                 params=params))
9526
9527   return disks
9528
9529
9530 def _GetInstanceInfoText(instance):
9531   """Compute that text that should be added to the disk's metadata.
9532
9533   """
9534   return "originstname+%s" % instance.name
9535
9536
9537 def _CalcEta(time_taken, written, total_size):
9538   """Calculates the ETA based on size written and total size.
9539
9540   @param time_taken: The time taken so far
9541   @param written: amount written so far
9542   @param total_size: The total size of data to be written
9543   @return: The remaining time in seconds
9544
9545   """
9546   avg_time = time_taken / float(written)
9547   return (total_size - written) * avg_time
9548
9549
9550 def _WipeDisks(lu, instance, disks=None):
9551   """Wipes instance disks.
9552
9553   @type lu: L{LogicalUnit}
9554   @param lu: the logical unit on whose behalf we execute
9555   @type instance: L{objects.Instance}
9556   @param instance: the instance whose disks we should create
9557   @return: the success of the wipe
9558
9559   """
9560   node = instance.primary_node
9561
9562   if disks is None:
9563     disks = [(idx, disk, 0)
9564              for (idx, disk) in enumerate(instance.disks)]
9565
9566   for (_, device, _) in disks:
9567     lu.cfg.SetDiskID(device, node)
9568
9569   logging.info("Pausing synchronization of disks of instance '%s'",
9570                instance.name)
9571   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9572                                                   (map(compat.snd, disks),
9573                                                    instance),
9574                                                   True)
9575   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9576
9577   for idx, success in enumerate(result.payload):
9578     if not success:
9579       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9580                    " failed", idx, instance.name)
9581
9582   try:
9583     for (idx, device, offset) in disks:
9584       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9585       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9586       wipe_chunk_size = \
9587         int(min(constants.MAX_WIPE_CHUNK,
9588                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9589
9590       size = device.size
9591       last_output = 0
9592       start_time = time.time()
9593
9594       if offset == 0:
9595         info_text = ""
9596       else:
9597         info_text = (" (from %s to %s)" %
9598                      (utils.FormatUnit(offset, "h"),
9599                       utils.FormatUnit(size, "h")))
9600
9601       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9602
9603       logging.info("Wiping disk %d for instance %s on node %s using"
9604                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9605
9606       while offset < size:
9607         wipe_size = min(wipe_chunk_size, size - offset)
9608
9609         logging.debug("Wiping disk %d, offset %s, chunk %s",
9610                       idx, offset, wipe_size)
9611
9612         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9613                                            wipe_size)
9614         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9615                      (idx, offset, wipe_size))
9616
9617         now = time.time()
9618         offset += wipe_size
9619         if now - last_output >= 60:
9620           eta = _CalcEta(now - start_time, offset, size)
9621           lu.LogInfo(" - done: %.1f%% ETA: %s",
9622                      offset / float(size) * 100, utils.FormatSeconds(eta))
9623           last_output = now
9624   finally:
9625     logging.info("Resuming synchronization of disks for instance '%s'",
9626                  instance.name)
9627
9628     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9629                                                     (map(compat.snd, disks),
9630                                                      instance),
9631                                                     False)
9632
9633     if result.fail_msg:
9634       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9635                     node, result.fail_msg)
9636     else:
9637       for idx, success in enumerate(result.payload):
9638         if not success:
9639           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9640                         " failed", idx, instance.name)
9641
9642
9643 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9644   """Create all disks for an instance.
9645
9646   This abstracts away some work from AddInstance.
9647
9648   @type lu: L{LogicalUnit}
9649   @param lu: the logical unit on whose behalf we execute
9650   @type instance: L{objects.Instance}
9651   @param instance: the instance whose disks we should create
9652   @type to_skip: list
9653   @param to_skip: list of indices to skip
9654   @type target_node: string
9655   @param target_node: if passed, overrides the target node for creation
9656   @rtype: boolean
9657   @return: the success of the creation
9658
9659   """
9660   info = _GetInstanceInfoText(instance)
9661   if target_node is None:
9662     pnode = instance.primary_node
9663     all_nodes = instance.all_nodes
9664   else:
9665     pnode = target_node
9666     all_nodes = [pnode]
9667
9668   if instance.disk_template in constants.DTS_FILEBASED:
9669     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9670     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9671
9672     result.Raise("Failed to create directory '%s' on"
9673                  " node %s" % (file_storage_dir, pnode))
9674
9675   # Note: this needs to be kept in sync with adding of disks in
9676   # LUInstanceSetParams
9677   for idx, device in enumerate(instance.disks):
9678     if to_skip and idx in to_skip:
9679       continue
9680     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9681     #HARDCODE
9682     for node in all_nodes:
9683       f_create = node == pnode
9684       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9685
9686
9687 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9688   """Remove all disks for an instance.
9689
9690   This abstracts away some work from `AddInstance()` and
9691   `RemoveInstance()`. Note that in case some of the devices couldn't
9692   be removed, the removal will continue with the other ones (compare
9693   with `_CreateDisks()`).
9694
9695   @type lu: L{LogicalUnit}
9696   @param lu: the logical unit on whose behalf we execute
9697   @type instance: L{objects.Instance}
9698   @param instance: the instance whose disks we should remove
9699   @type target_node: string
9700   @param target_node: used to override the node on which to remove the disks
9701   @rtype: boolean
9702   @return: the success of the removal
9703
9704   """
9705   logging.info("Removing block devices for instance %s", instance.name)
9706
9707   all_result = True
9708   ports_to_release = set()
9709   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9710   for (idx, device) in enumerate(anno_disks):
9711     if target_node:
9712       edata = [(target_node, device)]
9713     else:
9714       edata = device.ComputeNodeTree(instance.primary_node)
9715     for node, disk in edata:
9716       lu.cfg.SetDiskID(disk, node)
9717       result = lu.rpc.call_blockdev_remove(node, disk)
9718       if result.fail_msg:
9719         lu.LogWarning("Could not remove disk %s on node %s,"
9720                       " continuing anyway: %s", idx, node, result.fail_msg)
9721         if not (result.offline and node != instance.primary_node):
9722           all_result = False
9723
9724     # if this is a DRBD disk, return its port to the pool
9725     if device.dev_type in constants.LDS_DRBD:
9726       ports_to_release.add(device.logical_id[2])
9727
9728   if all_result or ignore_failures:
9729     for port in ports_to_release:
9730       lu.cfg.AddTcpUdpPort(port)
9731
9732   if instance.disk_template in constants.DTS_FILEBASED:
9733     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9734     if target_node:
9735       tgt = target_node
9736     else:
9737       tgt = instance.primary_node
9738     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9739     if result.fail_msg:
9740       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9741                     file_storage_dir, instance.primary_node, result.fail_msg)
9742       all_result = False
9743
9744   return all_result
9745
9746
9747 def _ComputeDiskSizePerVG(disk_template, disks):
9748   """Compute disk size requirements in the volume group
9749
9750   """
9751   def _compute(disks, payload):
9752     """Universal algorithm.
9753
9754     """
9755     vgs = {}
9756     for disk in disks:
9757       vgs[disk[constants.IDISK_VG]] = \
9758         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9759
9760     return vgs
9761
9762   # Required free disk space as a function of disk and swap space
9763   req_size_dict = {
9764     constants.DT_DISKLESS: {},
9765     constants.DT_PLAIN: _compute(disks, 0),
9766     # 128 MB are added for drbd metadata for each disk
9767     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9768     constants.DT_FILE: {},
9769     constants.DT_SHARED_FILE: {},
9770   }
9771
9772   if disk_template not in req_size_dict:
9773     raise errors.ProgrammerError("Disk template '%s' size requirement"
9774                                  " is unknown" % disk_template)
9775
9776   return req_size_dict[disk_template]
9777
9778
9779 def _FilterVmNodes(lu, nodenames):
9780   """Filters out non-vm_capable nodes from a list.
9781
9782   @type lu: L{LogicalUnit}
9783   @param lu: the logical unit for which we check
9784   @type nodenames: list
9785   @param nodenames: the list of nodes on which we should check
9786   @rtype: list
9787   @return: the list of vm-capable nodes
9788
9789   """
9790   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9791   return [name for name in nodenames if name not in vm_nodes]
9792
9793
9794 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9795   """Hypervisor parameter validation.
9796
9797   This function abstract the hypervisor parameter validation to be
9798   used in both instance create and instance modify.
9799
9800   @type lu: L{LogicalUnit}
9801   @param lu: the logical unit for which we check
9802   @type nodenames: list
9803   @param nodenames: the list of nodes on which we should check
9804   @type hvname: string
9805   @param hvname: the name of the hypervisor we should use
9806   @type hvparams: dict
9807   @param hvparams: the parameters which we need to check
9808   @raise errors.OpPrereqError: if the parameters are not valid
9809
9810   """
9811   nodenames = _FilterVmNodes(lu, nodenames)
9812
9813   cluster = lu.cfg.GetClusterInfo()
9814   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9815
9816   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9817   for node in nodenames:
9818     info = hvinfo[node]
9819     if info.offline:
9820       continue
9821     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9822
9823
9824 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9825   """OS parameters validation.
9826
9827   @type lu: L{LogicalUnit}
9828   @param lu: the logical unit for which we check
9829   @type required: boolean
9830   @param required: whether the validation should fail if the OS is not
9831       found
9832   @type nodenames: list
9833   @param nodenames: the list of nodes on which we should check
9834   @type osname: string
9835   @param osname: the name of the hypervisor we should use
9836   @type osparams: dict
9837   @param osparams: the parameters which we need to check
9838   @raise errors.OpPrereqError: if the parameters are not valid
9839
9840   """
9841   nodenames = _FilterVmNodes(lu, nodenames)
9842   result = lu.rpc.call_os_validate(nodenames, required, osname,
9843                                    [constants.OS_VALIDATE_PARAMETERS],
9844                                    osparams)
9845   for node, nres in result.items():
9846     # we don't check for offline cases since this should be run only
9847     # against the master node and/or an instance's nodes
9848     nres.Raise("OS Parameters validation failed on node %s" % node)
9849     if not nres.payload:
9850       lu.LogInfo("OS %s not found on node %s, validation skipped",
9851                  osname, node)
9852
9853
9854 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9855   """Wrapper around IAReqInstanceAlloc.
9856
9857   @param op: The instance opcode
9858   @param disks: The computed disks
9859   @param nics: The computed nics
9860   @param beparams: The full filled beparams
9861   @param node_whitelist: List of nodes which should appear as online to the
9862     allocator (unless the node is already marked offline)
9863
9864   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9865
9866   """
9867   spindle_use = beparams[constants.BE_SPINDLE_USE]
9868   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9869                                        disk_template=op.disk_template,
9870                                        tags=op.tags,
9871                                        os=op.os_type,
9872                                        vcpus=beparams[constants.BE_VCPUS],
9873                                        memory=beparams[constants.BE_MAXMEM],
9874                                        spindle_use=spindle_use,
9875                                        disks=disks,
9876                                        nics=[n.ToDict() for n in nics],
9877                                        hypervisor=op.hypervisor,
9878                                        node_whitelist=node_whitelist)
9879
9880
9881 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9882   """Computes the nics.
9883
9884   @param op: The instance opcode
9885   @param cluster: Cluster configuration object
9886   @param default_ip: The default ip to assign
9887   @param cfg: An instance of the configuration object
9888   @param ec_id: Execution context ID
9889
9890   @returns: The build up nics
9891
9892   """
9893   nics = []
9894   for nic in op.nics:
9895     nic_mode_req = nic.get(constants.INIC_MODE, None)
9896     nic_mode = nic_mode_req
9897     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9898       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9899
9900     net = nic.get(constants.INIC_NETWORK, None)
9901     link = nic.get(constants.NIC_LINK, None)
9902     ip = nic.get(constants.INIC_IP, None)
9903
9904     if net is None or net.lower() == constants.VALUE_NONE:
9905       net = None
9906     else:
9907       if nic_mode_req is not None or link is not None:
9908         raise errors.OpPrereqError("If network is given, no mode or link"
9909                                    " is allowed to be passed",
9910                                    errors.ECODE_INVAL)
9911
9912     # ip validity checks
9913     if ip is None or ip.lower() == constants.VALUE_NONE:
9914       nic_ip = None
9915     elif ip.lower() == constants.VALUE_AUTO:
9916       if not op.name_check:
9917         raise errors.OpPrereqError("IP address set to auto but name checks"
9918                                    " have been skipped",
9919                                    errors.ECODE_INVAL)
9920       nic_ip = default_ip
9921     else:
9922       # We defer pool operations until later, so that the iallocator has
9923       # filled in the instance's node(s) dimara
9924       if ip.lower() == constants.NIC_IP_POOL:
9925         if net is None:
9926           raise errors.OpPrereqError("if ip=pool, parameter network"
9927                                      " must be passed too",
9928                                      errors.ECODE_INVAL)
9929
9930       elif not netutils.IPAddress.IsValid(ip):
9931         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9932                                    errors.ECODE_INVAL)
9933
9934       nic_ip = ip
9935
9936     # TODO: check the ip address for uniqueness
9937     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9938       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9939                                  errors.ECODE_INVAL)
9940
9941     # MAC address verification
9942     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9943     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9944       mac = utils.NormalizeAndValidateMac(mac)
9945
9946       try:
9947         # TODO: We need to factor this out
9948         cfg.ReserveMAC(mac, ec_id)
9949       except errors.ReservationError:
9950         raise errors.OpPrereqError("MAC address %s already in use"
9951                                    " in cluster" % mac,
9952                                    errors.ECODE_NOTUNIQUE)
9953
9954     #  Build nic parameters
9955     nicparams = {}
9956     if nic_mode_req:
9957       nicparams[constants.NIC_MODE] = nic_mode
9958     if link:
9959       nicparams[constants.NIC_LINK] = link
9960
9961     check_params = cluster.SimpleFillNIC(nicparams)
9962     objects.NIC.CheckParameterSyntax(check_params)
9963     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9964                             network=net, nicparams=nicparams))
9965
9966   return nics
9967
9968
9969 def _ComputeDisks(op, default_vg):
9970   """Computes the instance disks.
9971
9972   @param op: The instance opcode
9973   @param default_vg: The default_vg to assume
9974
9975   @return: The computed disks
9976
9977   """
9978   disks = []
9979   for disk in op.disks:
9980     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9981     if mode not in constants.DISK_ACCESS_SET:
9982       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9983                                  mode, errors.ECODE_INVAL)
9984     size = disk.get(constants.IDISK_SIZE, None)
9985     if size is None:
9986       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9987     try:
9988       size = int(size)
9989     except (TypeError, ValueError):
9990       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9991                                  errors.ECODE_INVAL)
9992
9993     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9994     if ext_provider and op.disk_template != constants.DT_EXT:
9995       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9996                                  " disk template, not %s" %
9997                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
9998                                  op.disk_template), errors.ECODE_INVAL)
9999
10000     data_vg = disk.get(constants.IDISK_VG, default_vg)
10001     new_disk = {
10002       constants.IDISK_SIZE: size,
10003       constants.IDISK_MODE: mode,
10004       constants.IDISK_VG: data_vg,
10005       }
10006
10007     if constants.IDISK_METAVG in disk:
10008       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10009     if constants.IDISK_ADOPT in disk:
10010       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10011
10012     # For extstorage, demand the `provider' option and add any
10013     # additional parameters (ext-params) to the dict
10014     if op.disk_template == constants.DT_EXT:
10015       if ext_provider:
10016         new_disk[constants.IDISK_PROVIDER] = ext_provider
10017         for key in disk:
10018           if key not in constants.IDISK_PARAMS:
10019             new_disk[key] = disk[key]
10020       else:
10021         raise errors.OpPrereqError("Missing provider for template '%s'" %
10022                                    constants.DT_EXT, errors.ECODE_INVAL)
10023
10024     disks.append(new_disk)
10025
10026   return disks
10027
10028
10029 def _ComputeFullBeParams(op, cluster):
10030   """Computes the full beparams.
10031
10032   @param op: The instance opcode
10033   @param cluster: The cluster config object
10034
10035   @return: The fully filled beparams
10036
10037   """
10038   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10039   for param, value in op.beparams.iteritems():
10040     if value == constants.VALUE_AUTO:
10041       op.beparams[param] = default_beparams[param]
10042   objects.UpgradeBeParams(op.beparams)
10043   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10044   return cluster.SimpleFillBE(op.beparams)
10045
10046
10047 def _CheckOpportunisticLocking(op):
10048   """Generate error if opportunistic locking is not possible.
10049
10050   """
10051   if op.opportunistic_locking and not op.iallocator:
10052     raise errors.OpPrereqError("Opportunistic locking is only available in"
10053                                " combination with an instance allocator",
10054                                errors.ECODE_INVAL)
10055
10056
10057 class LUInstanceCreate(LogicalUnit):
10058   """Create an instance.
10059
10060   """
10061   HPATH = "instance-add"
10062   HTYPE = constants.HTYPE_INSTANCE
10063   REQ_BGL = False
10064
10065   def CheckArguments(self):
10066     """Check arguments.
10067
10068     """
10069     # do not require name_check to ease forward/backward compatibility
10070     # for tools
10071     if self.op.no_install and self.op.start:
10072       self.LogInfo("No-installation mode selected, disabling startup")
10073       self.op.start = False
10074     # validate/normalize the instance name
10075     self.op.instance_name = \
10076       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10077
10078     if self.op.ip_check and not self.op.name_check:
10079       # TODO: make the ip check more flexible and not depend on the name check
10080       raise errors.OpPrereqError("Cannot do IP address check without a name"
10081                                  " check", errors.ECODE_INVAL)
10082
10083     # check nics' parameter names
10084     for nic in self.op.nics:
10085       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10086
10087     # check disks. parameter names and consistent adopt/no-adopt strategy
10088     has_adopt = has_no_adopt = False
10089     for disk in self.op.disks:
10090       if self.op.disk_template != constants.DT_EXT:
10091         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10092       if constants.IDISK_ADOPT in disk:
10093         has_adopt = True
10094       else:
10095         has_no_adopt = True
10096     if has_adopt and has_no_adopt:
10097       raise errors.OpPrereqError("Either all disks are adopted or none is",
10098                                  errors.ECODE_INVAL)
10099     if has_adopt:
10100       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10101         raise errors.OpPrereqError("Disk adoption is not supported for the"
10102                                    " '%s' disk template" %
10103                                    self.op.disk_template,
10104                                    errors.ECODE_INVAL)
10105       if self.op.iallocator is not None:
10106         raise errors.OpPrereqError("Disk adoption not allowed with an"
10107                                    " iallocator script", errors.ECODE_INVAL)
10108       if self.op.mode == constants.INSTANCE_IMPORT:
10109         raise errors.OpPrereqError("Disk adoption not allowed for"
10110                                    " instance import", errors.ECODE_INVAL)
10111     else:
10112       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10113         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10114                                    " but no 'adopt' parameter given" %
10115                                    self.op.disk_template,
10116                                    errors.ECODE_INVAL)
10117
10118     self.adopt_disks = has_adopt
10119
10120     # instance name verification
10121     if self.op.name_check:
10122       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10123       self.op.instance_name = self.hostname1.name
10124       # used in CheckPrereq for ip ping check
10125       self.check_ip = self.hostname1.ip
10126     else:
10127       self.check_ip = None
10128
10129     # file storage checks
10130     if (self.op.file_driver and
10131         not self.op.file_driver in constants.FILE_DRIVER):
10132       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10133                                  self.op.file_driver, errors.ECODE_INVAL)
10134
10135     if self.op.disk_template == constants.DT_FILE:
10136       opcodes.RequireFileStorage()
10137     elif self.op.disk_template == constants.DT_SHARED_FILE:
10138       opcodes.RequireSharedFileStorage()
10139
10140     ### Node/iallocator related checks
10141     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10142
10143     if self.op.pnode is not None:
10144       if self.op.disk_template in constants.DTS_INT_MIRROR:
10145         if self.op.snode is None:
10146           raise errors.OpPrereqError("The networked disk templates need"
10147                                      " a mirror node", errors.ECODE_INVAL)
10148       elif self.op.snode:
10149         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10150                         " template")
10151         self.op.snode = None
10152
10153     _CheckOpportunisticLocking(self.op)
10154
10155     self._cds = _GetClusterDomainSecret()
10156
10157     if self.op.mode == constants.INSTANCE_IMPORT:
10158       # On import force_variant must be True, because if we forced it at
10159       # initial install, our only chance when importing it back is that it
10160       # works again!
10161       self.op.force_variant = True
10162
10163       if self.op.no_install:
10164         self.LogInfo("No-installation mode has no effect during import")
10165
10166     elif self.op.mode == constants.INSTANCE_CREATE:
10167       if self.op.os_type is None:
10168         raise errors.OpPrereqError("No guest OS specified",
10169                                    errors.ECODE_INVAL)
10170       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10171         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10172                                    " installation" % self.op.os_type,
10173                                    errors.ECODE_STATE)
10174       if self.op.disk_template is None:
10175         raise errors.OpPrereqError("No disk template specified",
10176                                    errors.ECODE_INVAL)
10177
10178     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10179       # Check handshake to ensure both clusters have the same domain secret
10180       src_handshake = self.op.source_handshake
10181       if not src_handshake:
10182         raise errors.OpPrereqError("Missing source handshake",
10183                                    errors.ECODE_INVAL)
10184
10185       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10186                                                            src_handshake)
10187       if errmsg:
10188         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10189                                    errors.ECODE_INVAL)
10190
10191       # Load and check source CA
10192       self.source_x509_ca_pem = self.op.source_x509_ca
10193       if not self.source_x509_ca_pem:
10194         raise errors.OpPrereqError("Missing source X509 CA",
10195                                    errors.ECODE_INVAL)
10196
10197       try:
10198         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10199                                                     self._cds)
10200       except OpenSSL.crypto.Error, err:
10201         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10202                                    (err, ), errors.ECODE_INVAL)
10203
10204       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10205       if errcode is not None:
10206         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10207                                    errors.ECODE_INVAL)
10208
10209       self.source_x509_ca = cert
10210
10211       src_instance_name = self.op.source_instance_name
10212       if not src_instance_name:
10213         raise errors.OpPrereqError("Missing source instance name",
10214                                    errors.ECODE_INVAL)
10215
10216       self.source_instance_name = \
10217           netutils.GetHostname(name=src_instance_name).name
10218
10219     else:
10220       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10221                                  self.op.mode, errors.ECODE_INVAL)
10222
10223   def ExpandNames(self):
10224     """ExpandNames for CreateInstance.
10225
10226     Figure out the right locks for instance creation.
10227
10228     """
10229     self.needed_locks = {}
10230
10231     instance_name = self.op.instance_name
10232     # this is just a preventive check, but someone might still add this
10233     # instance in the meantime, and creation will fail at lock-add time
10234     if instance_name in self.cfg.GetInstanceList():
10235       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10236                                  instance_name, errors.ECODE_EXISTS)
10237
10238     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10239
10240     if self.op.iallocator:
10241       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10242       # specifying a group on instance creation and then selecting nodes from
10243       # that group
10244       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10245       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10246
10247       if self.op.opportunistic_locking:
10248         self.opportunistic_locks[locking.LEVEL_NODE] = True
10249         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10250     else:
10251       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10252       nodelist = [self.op.pnode]
10253       if self.op.snode is not None:
10254         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10255         nodelist.append(self.op.snode)
10256       self.needed_locks[locking.LEVEL_NODE] = nodelist
10257
10258     # in case of import lock the source node too
10259     if self.op.mode == constants.INSTANCE_IMPORT:
10260       src_node = self.op.src_node
10261       src_path = self.op.src_path
10262
10263       if src_path is None:
10264         self.op.src_path = src_path = self.op.instance_name
10265
10266       if src_node is None:
10267         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10268         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10269         self.op.src_node = None
10270         if os.path.isabs(src_path):
10271           raise errors.OpPrereqError("Importing an instance from a path"
10272                                      " requires a source node option",
10273                                      errors.ECODE_INVAL)
10274       else:
10275         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10276         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10277           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10278         if not os.path.isabs(src_path):
10279           self.op.src_path = src_path = \
10280             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10281
10282     self.needed_locks[locking.LEVEL_NODE_RES] = \
10283       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10284
10285   def _RunAllocator(self):
10286     """Run the allocator based on input opcode.
10287
10288     """
10289     if self.op.opportunistic_locking:
10290       # Only consider nodes for which a lock is held
10291       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10292     else:
10293       node_whitelist = None
10294
10295     #TODO Export network to iallocator so that it chooses a pnode
10296     #     in a nodegroup that has the desired network connected to
10297     req = _CreateInstanceAllocRequest(self.op, self.disks,
10298                                       self.nics, self.be_full,
10299                                       node_whitelist)
10300     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10301
10302     ial.Run(self.op.iallocator)
10303
10304     if not ial.success:
10305       # When opportunistic locks are used only a temporary failure is generated
10306       if self.op.opportunistic_locking:
10307         ecode = errors.ECODE_TEMP_NORES
10308       else:
10309         ecode = errors.ECODE_NORES
10310
10311       raise errors.OpPrereqError("Can't compute nodes using"
10312                                  " iallocator '%s': %s" %
10313                                  (self.op.iallocator, ial.info),
10314                                  ecode)
10315
10316     self.op.pnode = ial.result[0]
10317     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10318                  self.op.instance_name, self.op.iallocator,
10319                  utils.CommaJoin(ial.result))
10320
10321     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10322
10323     if req.RequiredNodes() == 2:
10324       self.op.snode = ial.result[1]
10325
10326   def BuildHooksEnv(self):
10327     """Build hooks env.
10328
10329     This runs on master, primary and secondary nodes of the instance.
10330
10331     """
10332     env = {
10333       "ADD_MODE": self.op.mode,
10334       }
10335     if self.op.mode == constants.INSTANCE_IMPORT:
10336       env["SRC_NODE"] = self.op.src_node
10337       env["SRC_PATH"] = self.op.src_path
10338       env["SRC_IMAGES"] = self.src_images
10339
10340     env.update(_BuildInstanceHookEnv(
10341       name=self.op.instance_name,
10342       primary_node=self.op.pnode,
10343       secondary_nodes=self.secondaries,
10344       status=self.op.start,
10345       os_type=self.op.os_type,
10346       minmem=self.be_full[constants.BE_MINMEM],
10347       maxmem=self.be_full[constants.BE_MAXMEM],
10348       vcpus=self.be_full[constants.BE_VCPUS],
10349       nics=_NICListToTuple(self, self.nics),
10350       disk_template=self.op.disk_template,
10351       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10352              for d in self.disks],
10353       bep=self.be_full,
10354       hvp=self.hv_full,
10355       hypervisor_name=self.op.hypervisor,
10356       tags=self.op.tags,
10357     ))
10358
10359     return env
10360
10361   def BuildHooksNodes(self):
10362     """Build hooks nodes.
10363
10364     """
10365     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10366     return nl, nl
10367
10368   def _ReadExportInfo(self):
10369     """Reads the export information from disk.
10370
10371     It will override the opcode source node and path with the actual
10372     information, if these two were not specified before.
10373
10374     @return: the export information
10375
10376     """
10377     assert self.op.mode == constants.INSTANCE_IMPORT
10378
10379     src_node = self.op.src_node
10380     src_path = self.op.src_path
10381
10382     if src_node is None:
10383       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10384       exp_list = self.rpc.call_export_list(locked_nodes)
10385       found = False
10386       for node in exp_list:
10387         if exp_list[node].fail_msg:
10388           continue
10389         if src_path in exp_list[node].payload:
10390           found = True
10391           self.op.src_node = src_node = node
10392           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10393                                                        src_path)
10394           break
10395       if not found:
10396         raise errors.OpPrereqError("No export found for relative path %s" %
10397                                     src_path, errors.ECODE_INVAL)
10398
10399     _CheckNodeOnline(self, src_node)
10400     result = self.rpc.call_export_info(src_node, src_path)
10401     result.Raise("No export or invalid export found in dir %s" % src_path)
10402
10403     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10404     if not export_info.has_section(constants.INISECT_EXP):
10405       raise errors.ProgrammerError("Corrupted export config",
10406                                    errors.ECODE_ENVIRON)
10407
10408     ei_version = export_info.get(constants.INISECT_EXP, "version")
10409     if (int(ei_version) != constants.EXPORT_VERSION):
10410       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10411                                  (ei_version, constants.EXPORT_VERSION),
10412                                  errors.ECODE_ENVIRON)
10413     return export_info
10414
10415   def _ReadExportParams(self, einfo):
10416     """Use export parameters as defaults.
10417
10418     In case the opcode doesn't specify (as in override) some instance
10419     parameters, then try to use them from the export information, if
10420     that declares them.
10421
10422     """
10423     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10424
10425     if self.op.disk_template is None:
10426       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10427         self.op.disk_template = einfo.get(constants.INISECT_INS,
10428                                           "disk_template")
10429         if self.op.disk_template not in constants.DISK_TEMPLATES:
10430           raise errors.OpPrereqError("Disk template specified in configuration"
10431                                      " file is not one of the allowed values:"
10432                                      " %s" %
10433                                      " ".join(constants.DISK_TEMPLATES),
10434                                      errors.ECODE_INVAL)
10435       else:
10436         raise errors.OpPrereqError("No disk template specified and the export"
10437                                    " is missing the disk_template information",
10438                                    errors.ECODE_INVAL)
10439
10440     if not self.op.disks:
10441       disks = []
10442       # TODO: import the disk iv_name too
10443       for idx in range(constants.MAX_DISKS):
10444         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10445           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10446           disks.append({constants.IDISK_SIZE: disk_sz})
10447       self.op.disks = disks
10448       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10449         raise errors.OpPrereqError("No disk info specified and the export"
10450                                    " is missing the disk information",
10451                                    errors.ECODE_INVAL)
10452
10453     if not self.op.nics:
10454       nics = []
10455       for idx in range(constants.MAX_NICS):
10456         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10457           ndict = {}
10458           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10459             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10460             ndict[name] = v
10461           nics.append(ndict)
10462         else:
10463           break
10464       self.op.nics = nics
10465
10466     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10467       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10468
10469     if (self.op.hypervisor is None and
10470         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10471       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10472
10473     if einfo.has_section(constants.INISECT_HYP):
10474       # use the export parameters but do not override the ones
10475       # specified by the user
10476       for name, value in einfo.items(constants.INISECT_HYP):
10477         if name not in self.op.hvparams:
10478           self.op.hvparams[name] = value
10479
10480     if einfo.has_section(constants.INISECT_BEP):
10481       # use the parameters, without overriding
10482       for name, value in einfo.items(constants.INISECT_BEP):
10483         if name not in self.op.beparams:
10484           self.op.beparams[name] = value
10485         # Compatibility for the old "memory" be param
10486         if name == constants.BE_MEMORY:
10487           if constants.BE_MAXMEM not in self.op.beparams:
10488             self.op.beparams[constants.BE_MAXMEM] = value
10489           if constants.BE_MINMEM not in self.op.beparams:
10490             self.op.beparams[constants.BE_MINMEM] = value
10491     else:
10492       # try to read the parameters old style, from the main section
10493       for name in constants.BES_PARAMETERS:
10494         if (name not in self.op.beparams and
10495             einfo.has_option(constants.INISECT_INS, name)):
10496           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10497
10498     if einfo.has_section(constants.INISECT_OSP):
10499       # use the parameters, without overriding
10500       for name, value in einfo.items(constants.INISECT_OSP):
10501         if name not in self.op.osparams:
10502           self.op.osparams[name] = value
10503
10504   def _RevertToDefaults(self, cluster):
10505     """Revert the instance parameters to the default values.
10506
10507     """
10508     # hvparams
10509     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10510     for name in self.op.hvparams.keys():
10511       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10512         del self.op.hvparams[name]
10513     # beparams
10514     be_defs = cluster.SimpleFillBE({})
10515     for name in self.op.beparams.keys():
10516       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10517         del self.op.beparams[name]
10518     # nic params
10519     nic_defs = cluster.SimpleFillNIC({})
10520     for nic in self.op.nics:
10521       for name in constants.NICS_PARAMETERS:
10522         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10523           del nic[name]
10524     # osparams
10525     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10526     for name in self.op.osparams.keys():
10527       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10528         del self.op.osparams[name]
10529
10530   def _CalculateFileStorageDir(self):
10531     """Calculate final instance file storage dir.
10532
10533     """
10534     # file storage dir calculation/check
10535     self.instance_file_storage_dir = None
10536     if self.op.disk_template in constants.DTS_FILEBASED:
10537       # build the full file storage dir path
10538       joinargs = []
10539
10540       if self.op.disk_template == constants.DT_SHARED_FILE:
10541         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10542       else:
10543         get_fsd_fn = self.cfg.GetFileStorageDir
10544
10545       cfg_storagedir = get_fsd_fn()
10546       if not cfg_storagedir:
10547         raise errors.OpPrereqError("Cluster file storage dir not defined",
10548                                    errors.ECODE_STATE)
10549       joinargs.append(cfg_storagedir)
10550
10551       if self.op.file_storage_dir is not None:
10552         joinargs.append(self.op.file_storage_dir)
10553
10554       joinargs.append(self.op.instance_name)
10555
10556       # pylint: disable=W0142
10557       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10558
10559   def CheckPrereq(self): # pylint: disable=R0914
10560     """Check prerequisites.
10561
10562     """
10563     self._CalculateFileStorageDir()
10564
10565     if self.op.mode == constants.INSTANCE_IMPORT:
10566       export_info = self._ReadExportInfo()
10567       self._ReadExportParams(export_info)
10568       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10569     else:
10570       self._old_instance_name = None
10571
10572     if (not self.cfg.GetVGName() and
10573         self.op.disk_template not in constants.DTS_NOT_LVM):
10574       raise errors.OpPrereqError("Cluster does not support lvm-based"
10575                                  " instances", errors.ECODE_STATE)
10576
10577     if (self.op.hypervisor is None or
10578         self.op.hypervisor == constants.VALUE_AUTO):
10579       self.op.hypervisor = self.cfg.GetHypervisorType()
10580
10581     cluster = self.cfg.GetClusterInfo()
10582     enabled_hvs = cluster.enabled_hypervisors
10583     if self.op.hypervisor not in enabled_hvs:
10584       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10585                                  " cluster (%s)" %
10586                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10587                                  errors.ECODE_STATE)
10588
10589     # Check tag validity
10590     for tag in self.op.tags:
10591       objects.TaggableObject.ValidateTag(tag)
10592
10593     # check hypervisor parameter syntax (locally)
10594     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10595     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10596                                       self.op.hvparams)
10597     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10598     hv_type.CheckParameterSyntax(filled_hvp)
10599     self.hv_full = filled_hvp
10600     # check that we don't specify global parameters on an instance
10601     _CheckGlobalHvParams(self.op.hvparams)
10602
10603     # fill and remember the beparams dict
10604     self.be_full = _ComputeFullBeParams(self.op, cluster)
10605
10606     # build os parameters
10607     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10608
10609     # now that hvp/bep are in final format, let's reset to defaults,
10610     # if told to do so
10611     if self.op.identify_defaults:
10612       self._RevertToDefaults(cluster)
10613
10614     # NIC buildup
10615     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10616                              self.proc.GetECId())
10617
10618     # disk checks/pre-build
10619     default_vg = self.cfg.GetVGName()
10620     self.disks = _ComputeDisks(self.op, default_vg)
10621
10622     if self.op.mode == constants.INSTANCE_IMPORT:
10623       disk_images = []
10624       for idx in range(len(self.disks)):
10625         option = "disk%d_dump" % idx
10626         if export_info.has_option(constants.INISECT_INS, option):
10627           # FIXME: are the old os-es, disk sizes, etc. useful?
10628           export_name = export_info.get(constants.INISECT_INS, option)
10629           image = utils.PathJoin(self.op.src_path, export_name)
10630           disk_images.append(image)
10631         else:
10632           disk_images.append(False)
10633
10634       self.src_images = disk_images
10635
10636       if self.op.instance_name == self._old_instance_name:
10637         for idx, nic in enumerate(self.nics):
10638           if nic.mac == constants.VALUE_AUTO:
10639             nic_mac_ini = "nic%d_mac" % idx
10640             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10641
10642     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10643
10644     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10645     if self.op.ip_check:
10646       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10647         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10648                                    (self.check_ip, self.op.instance_name),
10649                                    errors.ECODE_NOTUNIQUE)
10650
10651     #### mac address generation
10652     # By generating here the mac address both the allocator and the hooks get
10653     # the real final mac address rather than the 'auto' or 'generate' value.
10654     # There is a race condition between the generation and the instance object
10655     # creation, which means that we know the mac is valid now, but we're not
10656     # sure it will be when we actually add the instance. If things go bad
10657     # adding the instance will abort because of a duplicate mac, and the
10658     # creation job will fail.
10659     for nic in self.nics:
10660       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10661         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10662
10663     #### allocator run
10664
10665     if self.op.iallocator is not None:
10666       self._RunAllocator()
10667
10668     # Release all unneeded node locks
10669     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10670     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10671     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10672     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10673
10674     assert (self.owned_locks(locking.LEVEL_NODE) ==
10675             self.owned_locks(locking.LEVEL_NODE_RES)), \
10676       "Node locks differ from node resource locks"
10677
10678     #### node related checks
10679
10680     # check primary node
10681     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10682     assert self.pnode is not None, \
10683       "Cannot retrieve locked node %s" % self.op.pnode
10684     if pnode.offline:
10685       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10686                                  pnode.name, errors.ECODE_STATE)
10687     if pnode.drained:
10688       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10689                                  pnode.name, errors.ECODE_STATE)
10690     if not pnode.vm_capable:
10691       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10692                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10693
10694     self.secondaries = []
10695
10696     # Fill in any IPs from IP pools. This must happen here, because we need to
10697     # know the nic's primary node, as specified by the iallocator
10698     for idx, nic in enumerate(self.nics):
10699       net = nic.network
10700       if net is not None:
10701         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10702         if netparams is None:
10703           raise errors.OpPrereqError("No netparams found for network"
10704                                      " %s. Propably not connected to"
10705                                      " node's %s nodegroup" %
10706                                      (net, self.pnode.name),
10707                                      errors.ECODE_INVAL)
10708         self.LogInfo("NIC/%d inherits netparams %s" %
10709                      (idx, netparams.values()))
10710         nic.nicparams = dict(netparams)
10711         if nic.ip is not None:
10712           if nic.ip.lower() == constants.NIC_IP_POOL:
10713             try:
10714               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10715             except errors.ReservationError:
10716               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10717                                          " from the address pool" % idx,
10718                                          errors.ECODE_STATE)
10719             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10720           else:
10721             try:
10722               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10723             except errors.ReservationError:
10724               raise errors.OpPrereqError("IP address %s already in use"
10725                                          " or does not belong to network %s" %
10726                                          (nic.ip, net),
10727                                          errors.ECODE_NOTUNIQUE)
10728
10729       # net is None, ip None or given
10730       elif self.op.conflicts_check:
10731         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10732
10733     # mirror node verification
10734     if self.op.disk_template in constants.DTS_INT_MIRROR:
10735       if self.op.snode == pnode.name:
10736         raise errors.OpPrereqError("The secondary node cannot be the"
10737                                    " primary node", errors.ECODE_INVAL)
10738       _CheckNodeOnline(self, self.op.snode)
10739       _CheckNodeNotDrained(self, self.op.snode)
10740       _CheckNodeVmCapable(self, self.op.snode)
10741       self.secondaries.append(self.op.snode)
10742
10743       snode = self.cfg.GetNodeInfo(self.op.snode)
10744       if pnode.group != snode.group:
10745         self.LogWarning("The primary and secondary nodes are in two"
10746                         " different node groups; the disk parameters"
10747                         " from the first disk's node group will be"
10748                         " used")
10749
10750     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10751       nodes = [pnode]
10752       if self.op.disk_template in constants.DTS_INT_MIRROR:
10753         nodes.append(snode)
10754       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10755       if compat.any(map(has_es, nodes)):
10756         raise errors.OpPrereqError("Disk template %s not supported with"
10757                                    " exclusive storage" % self.op.disk_template,
10758                                    errors.ECODE_STATE)
10759
10760     nodenames = [pnode.name] + self.secondaries
10761
10762     # Verify instance specs
10763     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10764     ispec = {
10765       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10766       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10767       constants.ISPEC_DISK_COUNT: len(self.disks),
10768       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10769       constants.ISPEC_NIC_COUNT: len(self.nics),
10770       constants.ISPEC_SPINDLE_USE: spindle_use,
10771       }
10772
10773     group_info = self.cfg.GetNodeGroup(pnode.group)
10774     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10775     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10776     if not self.op.ignore_ipolicy and res:
10777       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10778              (pnode.group, group_info.name, utils.CommaJoin(res)))
10779       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10780
10781     if not self.adopt_disks:
10782       if self.op.disk_template == constants.DT_RBD:
10783         # _CheckRADOSFreeSpace() is just a placeholder.
10784         # Any function that checks prerequisites can be placed here.
10785         # Check if there is enough space on the RADOS cluster.
10786         _CheckRADOSFreeSpace()
10787       elif self.op.disk_template == constants.DT_EXT:
10788         # FIXME: Function that checks prereqs if needed
10789         pass
10790       else:
10791         # Check lv size requirements, if not adopting
10792         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10793         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10794
10795     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10796       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10797                                 disk[constants.IDISK_ADOPT])
10798                      for disk in self.disks])
10799       if len(all_lvs) != len(self.disks):
10800         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10801                                    errors.ECODE_INVAL)
10802       for lv_name in all_lvs:
10803         try:
10804           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10805           # to ReserveLV uses the same syntax
10806           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10807         except errors.ReservationError:
10808           raise errors.OpPrereqError("LV named %s used by another instance" %
10809                                      lv_name, errors.ECODE_NOTUNIQUE)
10810
10811       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10812       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10813
10814       node_lvs = self.rpc.call_lv_list([pnode.name],
10815                                        vg_names.payload.keys())[pnode.name]
10816       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10817       node_lvs = node_lvs.payload
10818
10819       delta = all_lvs.difference(node_lvs.keys())
10820       if delta:
10821         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10822                                    utils.CommaJoin(delta),
10823                                    errors.ECODE_INVAL)
10824       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10825       if online_lvs:
10826         raise errors.OpPrereqError("Online logical volumes found, cannot"
10827                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10828                                    errors.ECODE_STATE)
10829       # update the size of disk based on what is found
10830       for dsk in self.disks:
10831         dsk[constants.IDISK_SIZE] = \
10832           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10833                                         dsk[constants.IDISK_ADOPT])][0]))
10834
10835     elif self.op.disk_template == constants.DT_BLOCK:
10836       # Normalize and de-duplicate device paths
10837       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10838                        for disk in self.disks])
10839       if len(all_disks) != len(self.disks):
10840         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10841                                    errors.ECODE_INVAL)
10842       baddisks = [d for d in all_disks
10843                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10844       if baddisks:
10845         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10846                                    " cannot be adopted" %
10847                                    (utils.CommaJoin(baddisks),
10848                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10849                                    errors.ECODE_INVAL)
10850
10851       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10852                                             list(all_disks))[pnode.name]
10853       node_disks.Raise("Cannot get block device information from node %s" %
10854                        pnode.name)
10855       node_disks = node_disks.payload
10856       delta = all_disks.difference(node_disks.keys())
10857       if delta:
10858         raise errors.OpPrereqError("Missing block device(s): %s" %
10859                                    utils.CommaJoin(delta),
10860                                    errors.ECODE_INVAL)
10861       for dsk in self.disks:
10862         dsk[constants.IDISK_SIZE] = \
10863           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10864
10865     # Verify instance specs
10866     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10867     ispec = {
10868       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10869       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10870       constants.ISPEC_DISK_COUNT: len(self.disks),
10871       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10872                                   for disk in self.disks],
10873       constants.ISPEC_NIC_COUNT: len(self.nics),
10874       constants.ISPEC_SPINDLE_USE: spindle_use,
10875       }
10876
10877     group_info = self.cfg.GetNodeGroup(pnode.group)
10878     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10879     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10880     if not self.op.ignore_ipolicy and res:
10881       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10882                                   " policy: %s") % (pnode.group,
10883                                                     utils.CommaJoin(res)),
10884                                   errors.ECODE_INVAL)
10885
10886     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10887
10888     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10889     # check OS parameters (remotely)
10890     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10891
10892     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10893
10894     #TODO: _CheckExtParams (remotely)
10895     # Check parameters for extstorage
10896
10897     # memory check on primary node
10898     #TODO(dynmem): use MINMEM for checking
10899     if self.op.start:
10900       _CheckNodeFreeMemory(self, self.pnode.name,
10901                            "creating instance %s" % self.op.instance_name,
10902                            self.be_full[constants.BE_MAXMEM],
10903                            self.op.hypervisor)
10904
10905     self.dry_run_result = list(nodenames)
10906
10907   def Exec(self, feedback_fn):
10908     """Create and add the instance to the cluster.
10909
10910     """
10911     instance = self.op.instance_name
10912     pnode_name = self.pnode.name
10913
10914     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10915                 self.owned_locks(locking.LEVEL_NODE)), \
10916       "Node locks differ from node resource locks"
10917     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10918
10919     ht_kind = self.op.hypervisor
10920     if ht_kind in constants.HTS_REQ_PORT:
10921       network_port = self.cfg.AllocatePort()
10922     else:
10923       network_port = None
10924
10925     # This is ugly but we got a chicken-egg problem here
10926     # We can only take the group disk parameters, as the instance
10927     # has no disks yet (we are generating them right here).
10928     node = self.cfg.GetNodeInfo(pnode_name)
10929     nodegroup = self.cfg.GetNodeGroup(node.group)
10930     disks = _GenerateDiskTemplate(self,
10931                                   self.op.disk_template,
10932                                   instance, pnode_name,
10933                                   self.secondaries,
10934                                   self.disks,
10935                                   self.instance_file_storage_dir,
10936                                   self.op.file_driver,
10937                                   0,
10938                                   feedback_fn,
10939                                   self.cfg.GetGroupDiskParams(nodegroup))
10940
10941     iobj = objects.Instance(name=instance, os=self.op.os_type,
10942                             primary_node=pnode_name,
10943                             nics=self.nics, disks=disks,
10944                             disk_template=self.op.disk_template,
10945                             admin_state=constants.ADMINST_DOWN,
10946                             network_port=network_port,
10947                             beparams=self.op.beparams,
10948                             hvparams=self.op.hvparams,
10949                             hypervisor=self.op.hypervisor,
10950                             osparams=self.op.osparams,
10951                             )
10952
10953     if self.op.tags:
10954       for tag in self.op.tags:
10955         iobj.AddTag(tag)
10956
10957     if self.adopt_disks:
10958       if self.op.disk_template == constants.DT_PLAIN:
10959         # rename LVs to the newly-generated names; we need to construct
10960         # 'fake' LV disks with the old data, plus the new unique_id
10961         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10962         rename_to = []
10963         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10964           rename_to.append(t_dsk.logical_id)
10965           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10966           self.cfg.SetDiskID(t_dsk, pnode_name)
10967         result = self.rpc.call_blockdev_rename(pnode_name,
10968                                                zip(tmp_disks, rename_to))
10969         result.Raise("Failed to rename adoped LVs")
10970     else:
10971       feedback_fn("* creating instance disks...")
10972       try:
10973         _CreateDisks(self, iobj)
10974       except errors.OpExecError:
10975         self.LogWarning("Device creation failed, reverting...")
10976         try:
10977           _RemoveDisks(self, iobj)
10978         finally:
10979           self.cfg.ReleaseDRBDMinors(instance)
10980           raise
10981
10982     feedback_fn("adding instance %s to cluster config" % instance)
10983
10984     self.cfg.AddInstance(iobj, self.proc.GetECId())
10985
10986     # Declare that we don't want to remove the instance lock anymore, as we've
10987     # added the instance to the config
10988     del self.remove_locks[locking.LEVEL_INSTANCE]
10989
10990     if self.op.mode == constants.INSTANCE_IMPORT:
10991       # Release unused nodes
10992       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10993     else:
10994       # Release all nodes
10995       _ReleaseLocks(self, locking.LEVEL_NODE)
10996
10997     disk_abort = False
10998     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10999       feedback_fn("* wiping instance disks...")
11000       try:
11001         _WipeDisks(self, iobj)
11002       except errors.OpExecError, err:
11003         logging.exception("Wiping disks failed")
11004         self.LogWarning("Wiping instance disks failed (%s)", err)
11005         disk_abort = True
11006
11007     if disk_abort:
11008       # Something is already wrong with the disks, don't do anything else
11009       pass
11010     elif self.op.wait_for_sync:
11011       disk_abort = not _WaitForSync(self, iobj)
11012     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11013       # make sure the disks are not degraded (still sync-ing is ok)
11014       feedback_fn("* checking mirrors status")
11015       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11016     else:
11017       disk_abort = False
11018
11019     if disk_abort:
11020       _RemoveDisks(self, iobj)
11021       self.cfg.RemoveInstance(iobj.name)
11022       # Make sure the instance lock gets removed
11023       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11024       raise errors.OpExecError("There are some degraded disks for"
11025                                " this instance")
11026
11027     # Release all node resource locks
11028     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11029
11030     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11031       # we need to set the disks ID to the primary node, since the
11032       # preceding code might or might have not done it, depending on
11033       # disk template and other options
11034       for disk in iobj.disks:
11035         self.cfg.SetDiskID(disk, pnode_name)
11036       if self.op.mode == constants.INSTANCE_CREATE:
11037         if not self.op.no_install:
11038           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11039                         not self.op.wait_for_sync)
11040           if pause_sync:
11041             feedback_fn("* pausing disk sync to install instance OS")
11042             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11043                                                               (iobj.disks,
11044                                                                iobj), True)
11045             for idx, success in enumerate(result.payload):
11046               if not success:
11047                 logging.warn("pause-sync of instance %s for disk %d failed",
11048                              instance, idx)
11049
11050           feedback_fn("* running the instance OS create scripts...")
11051           # FIXME: pass debug option from opcode to backend
11052           os_add_result = \
11053             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11054                                           self.op.debug_level)
11055           if pause_sync:
11056             feedback_fn("* resuming disk sync")
11057             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11058                                                               (iobj.disks,
11059                                                                iobj), False)
11060             for idx, success in enumerate(result.payload):
11061               if not success:
11062                 logging.warn("resume-sync of instance %s for disk %d failed",
11063                              instance, idx)
11064
11065           os_add_result.Raise("Could not add os for instance %s"
11066                               " on node %s" % (instance, pnode_name))
11067
11068       else:
11069         if self.op.mode == constants.INSTANCE_IMPORT:
11070           feedback_fn("* running the instance OS import scripts...")
11071
11072           transfers = []
11073
11074           for idx, image in enumerate(self.src_images):
11075             if not image:
11076               continue
11077
11078             # FIXME: pass debug option from opcode to backend
11079             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11080                                                constants.IEIO_FILE, (image, ),
11081                                                constants.IEIO_SCRIPT,
11082                                                (iobj.disks[idx], idx),
11083                                                None)
11084             transfers.append(dt)
11085
11086           import_result = \
11087             masterd.instance.TransferInstanceData(self, feedback_fn,
11088                                                   self.op.src_node, pnode_name,
11089                                                   self.pnode.secondary_ip,
11090                                                   iobj, transfers)
11091           if not compat.all(import_result):
11092             self.LogWarning("Some disks for instance %s on node %s were not"
11093                             " imported successfully" % (instance, pnode_name))
11094
11095           rename_from = self._old_instance_name
11096
11097         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11098           feedback_fn("* preparing remote import...")
11099           # The source cluster will stop the instance before attempting to make
11100           # a connection. In some cases stopping an instance can take a long
11101           # time, hence the shutdown timeout is added to the connection
11102           # timeout.
11103           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11104                              self.op.source_shutdown_timeout)
11105           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11106
11107           assert iobj.primary_node == self.pnode.name
11108           disk_results = \
11109             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11110                                           self.source_x509_ca,
11111                                           self._cds, timeouts)
11112           if not compat.all(disk_results):
11113             # TODO: Should the instance still be started, even if some disks
11114             # failed to import (valid for local imports, too)?
11115             self.LogWarning("Some disks for instance %s on node %s were not"
11116                             " imported successfully" % (instance, pnode_name))
11117
11118           rename_from = self.source_instance_name
11119
11120         else:
11121           # also checked in the prereq part
11122           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11123                                        % self.op.mode)
11124
11125         # Run rename script on newly imported instance
11126         assert iobj.name == instance
11127         feedback_fn("Running rename script for %s" % instance)
11128         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11129                                                    rename_from,
11130                                                    self.op.debug_level)
11131         if result.fail_msg:
11132           self.LogWarning("Failed to run rename script for %s on node"
11133                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11134
11135     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11136
11137     if self.op.start:
11138       iobj.admin_state = constants.ADMINST_UP
11139       self.cfg.Update(iobj, feedback_fn)
11140       logging.info("Starting instance %s on node %s", instance, pnode_name)
11141       feedback_fn("* starting instance...")
11142       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11143                                             False)
11144       result.Raise("Could not start instance")
11145
11146     return list(iobj.all_nodes)
11147
11148
11149 class LUInstanceMultiAlloc(NoHooksLU):
11150   """Allocates multiple instances at the same time.
11151
11152   """
11153   REQ_BGL = False
11154
11155   def CheckArguments(self):
11156     """Check arguments.
11157
11158     """
11159     nodes = []
11160     for inst in self.op.instances:
11161       if inst.iallocator is not None:
11162         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11163                                    " instance objects", errors.ECODE_INVAL)
11164       nodes.append(bool(inst.pnode))
11165       if inst.disk_template in constants.DTS_INT_MIRROR:
11166         nodes.append(bool(inst.snode))
11167
11168     has_nodes = compat.any(nodes)
11169     if compat.all(nodes) ^ has_nodes:
11170       raise errors.OpPrereqError("There are instance objects providing"
11171                                  " pnode/snode while others do not",
11172                                  errors.ECODE_INVAL)
11173
11174     if self.op.iallocator is None:
11175       default_iallocator = self.cfg.GetDefaultIAllocator()
11176       if default_iallocator and has_nodes:
11177         self.op.iallocator = default_iallocator
11178       else:
11179         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11180                                    " given and no cluster-wide default"
11181                                    " iallocator found; please specify either"
11182                                    " an iallocator or nodes on the instances"
11183                                    " or set a cluster-wide default iallocator",
11184                                    errors.ECODE_INVAL)
11185
11186     _CheckOpportunisticLocking(self.op)
11187
11188     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11189     if dups:
11190       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11191                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11192
11193   def ExpandNames(self):
11194     """Calculate the locks.
11195
11196     """
11197     self.share_locks = _ShareAll()
11198     self.needed_locks = {
11199       # iallocator will select nodes and even if no iallocator is used,
11200       # collisions with LUInstanceCreate should be avoided
11201       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11202       }
11203
11204     if self.op.iallocator:
11205       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11206       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11207
11208       if self.op.opportunistic_locking:
11209         self.opportunistic_locks[locking.LEVEL_NODE] = True
11210         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11211     else:
11212       nodeslist = []
11213       for inst in self.op.instances:
11214         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11215         nodeslist.append(inst.pnode)
11216         if inst.snode is not None:
11217           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11218           nodeslist.append(inst.snode)
11219
11220       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11221       # Lock resources of instance's primary and secondary nodes (copy to
11222       # prevent accidential modification)
11223       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11224
11225   def CheckPrereq(self):
11226     """Check prerequisite.
11227
11228     """
11229     cluster = self.cfg.GetClusterInfo()
11230     default_vg = self.cfg.GetVGName()
11231     ec_id = self.proc.GetECId()
11232
11233     if self.op.opportunistic_locking:
11234       # Only consider nodes for which a lock is held
11235       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11236     else:
11237       node_whitelist = None
11238
11239     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11240                                          _ComputeNics(op, cluster, None,
11241                                                       self.cfg, ec_id),
11242                                          _ComputeFullBeParams(op, cluster),
11243                                          node_whitelist)
11244              for op in self.op.instances]
11245
11246     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11247     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11248
11249     ial.Run(self.op.iallocator)
11250
11251     if not ial.success:
11252       raise errors.OpPrereqError("Can't compute nodes using"
11253                                  " iallocator '%s': %s" %
11254                                  (self.op.iallocator, ial.info),
11255                                  errors.ECODE_NORES)
11256
11257     self.ia_result = ial.result
11258
11259     if self.op.dry_run:
11260       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11261         constants.JOB_IDS_KEY: [],
11262         })
11263
11264   def _ConstructPartialResult(self):
11265     """Contructs the partial result.
11266
11267     """
11268     (allocatable, failed) = self.ia_result
11269     return {
11270       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11271         map(compat.fst, allocatable),
11272       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11273       }
11274
11275   def Exec(self, feedback_fn):
11276     """Executes the opcode.
11277
11278     """
11279     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11280     (allocatable, failed) = self.ia_result
11281
11282     jobs = []
11283     for (name, nodes) in allocatable:
11284       op = op2inst.pop(name)
11285
11286       if len(nodes) > 1:
11287         (op.pnode, op.snode) = nodes
11288       else:
11289         (op.pnode,) = nodes
11290
11291       jobs.append([op])
11292
11293     missing = set(op2inst.keys()) - set(failed)
11294     assert not missing, \
11295       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11296
11297     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11298
11299
11300 def _CheckRADOSFreeSpace():
11301   """Compute disk size requirements inside the RADOS cluster.
11302
11303   """
11304   # For the RADOS cluster we assume there is always enough space.
11305   pass
11306
11307
11308 class LUInstanceConsole(NoHooksLU):
11309   """Connect to an instance's console.
11310
11311   This is somewhat special in that it returns the command line that
11312   you need to run on the master node in order to connect to the
11313   console.
11314
11315   """
11316   REQ_BGL = False
11317
11318   def ExpandNames(self):
11319     self.share_locks = _ShareAll()
11320     self._ExpandAndLockInstance()
11321
11322   def CheckPrereq(self):
11323     """Check prerequisites.
11324
11325     This checks that the instance is in the cluster.
11326
11327     """
11328     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11329     assert self.instance is not None, \
11330       "Cannot retrieve locked instance %s" % self.op.instance_name
11331     _CheckNodeOnline(self, self.instance.primary_node)
11332
11333   def Exec(self, feedback_fn):
11334     """Connect to the console of an instance
11335
11336     """
11337     instance = self.instance
11338     node = instance.primary_node
11339
11340     node_insts = self.rpc.call_instance_list([node],
11341                                              [instance.hypervisor])[node]
11342     node_insts.Raise("Can't get node information from %s" % node)
11343
11344     if instance.name not in node_insts.payload:
11345       if instance.admin_state == constants.ADMINST_UP:
11346         state = constants.INSTST_ERRORDOWN
11347       elif instance.admin_state == constants.ADMINST_DOWN:
11348         state = constants.INSTST_ADMINDOWN
11349       else:
11350         state = constants.INSTST_ADMINOFFLINE
11351       raise errors.OpExecError("Instance %s is not running (state %s)" %
11352                                (instance.name, state))
11353
11354     logging.debug("Connecting to console of %s on %s", instance.name, node)
11355
11356     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11357
11358
11359 def _GetInstanceConsole(cluster, instance):
11360   """Returns console information for an instance.
11361
11362   @type cluster: L{objects.Cluster}
11363   @type instance: L{objects.Instance}
11364   @rtype: dict
11365
11366   """
11367   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11368   # beparams and hvparams are passed separately, to avoid editing the
11369   # instance and then saving the defaults in the instance itself.
11370   hvparams = cluster.FillHV(instance)
11371   beparams = cluster.FillBE(instance)
11372   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11373
11374   assert console.instance == instance.name
11375   assert console.Validate()
11376
11377   return console.ToDict()
11378
11379
11380 class LUInstanceReplaceDisks(LogicalUnit):
11381   """Replace the disks of an instance.
11382
11383   """
11384   HPATH = "mirrors-replace"
11385   HTYPE = constants.HTYPE_INSTANCE
11386   REQ_BGL = False
11387
11388   def CheckArguments(self):
11389     """Check arguments.
11390
11391     """
11392     remote_node = self.op.remote_node
11393     ialloc = self.op.iallocator
11394     if self.op.mode == constants.REPLACE_DISK_CHG:
11395       if remote_node is None and ialloc is None:
11396         raise errors.OpPrereqError("When changing the secondary either an"
11397                                    " iallocator script must be used or the"
11398                                    " new node given", errors.ECODE_INVAL)
11399       else:
11400         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11401
11402     elif remote_node is not None or ialloc is not None:
11403       # Not replacing the secondary
11404       raise errors.OpPrereqError("The iallocator and new node options can"
11405                                  " only be used when changing the"
11406                                  " secondary node", errors.ECODE_INVAL)
11407
11408   def ExpandNames(self):
11409     self._ExpandAndLockInstance()
11410
11411     assert locking.LEVEL_NODE not in self.needed_locks
11412     assert locking.LEVEL_NODE_RES not in self.needed_locks
11413     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11414
11415     assert self.op.iallocator is None or self.op.remote_node is None, \
11416       "Conflicting options"
11417
11418     if self.op.remote_node is not None:
11419       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11420
11421       # Warning: do not remove the locking of the new secondary here
11422       # unless DRBD8.AddChildren is changed to work in parallel;
11423       # currently it doesn't since parallel invocations of
11424       # FindUnusedMinor will conflict
11425       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11426       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11427     else:
11428       self.needed_locks[locking.LEVEL_NODE] = []
11429       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11430
11431       if self.op.iallocator is not None:
11432         # iallocator will select a new node in the same group
11433         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11434         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11435
11436     self.needed_locks[locking.LEVEL_NODE_RES] = []
11437
11438     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11439                                    self.op.iallocator, self.op.remote_node,
11440                                    self.op.disks, self.op.early_release,
11441                                    self.op.ignore_ipolicy)
11442
11443     self.tasklets = [self.replacer]
11444
11445   def DeclareLocks(self, level):
11446     if level == locking.LEVEL_NODEGROUP:
11447       assert self.op.remote_node is None
11448       assert self.op.iallocator is not None
11449       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11450
11451       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11452       # Lock all groups used by instance optimistically; this requires going
11453       # via the node before it's locked, requiring verification later on
11454       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11455         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11456
11457     elif level == locking.LEVEL_NODE:
11458       if self.op.iallocator is not None:
11459         assert self.op.remote_node is None
11460         assert not self.needed_locks[locking.LEVEL_NODE]
11461         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11462
11463         # Lock member nodes of all locked groups
11464         self.needed_locks[locking.LEVEL_NODE] = \
11465             [node_name
11466              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11467              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11468       else:
11469         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11470
11471         self._LockInstancesNodes()
11472
11473     elif level == locking.LEVEL_NODE_RES:
11474       # Reuse node locks
11475       self.needed_locks[locking.LEVEL_NODE_RES] = \
11476         self.needed_locks[locking.LEVEL_NODE]
11477
11478   def BuildHooksEnv(self):
11479     """Build hooks env.
11480
11481     This runs on the master, the primary and all the secondaries.
11482
11483     """
11484     instance = self.replacer.instance
11485     env = {
11486       "MODE": self.op.mode,
11487       "NEW_SECONDARY": self.op.remote_node,
11488       "OLD_SECONDARY": instance.secondary_nodes[0],
11489       }
11490     env.update(_BuildInstanceHookEnvByObject(self, instance))
11491     return env
11492
11493   def BuildHooksNodes(self):
11494     """Build hooks nodes.
11495
11496     """
11497     instance = self.replacer.instance
11498     nl = [
11499       self.cfg.GetMasterNode(),
11500       instance.primary_node,
11501       ]
11502     if self.op.remote_node is not None:
11503       nl.append(self.op.remote_node)
11504     return nl, nl
11505
11506   def CheckPrereq(self):
11507     """Check prerequisites.
11508
11509     """
11510     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11511             self.op.iallocator is None)
11512
11513     # Verify if node group locks are still correct
11514     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11515     if owned_groups:
11516       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11517
11518     return LogicalUnit.CheckPrereq(self)
11519
11520
11521 class TLReplaceDisks(Tasklet):
11522   """Replaces disks for an instance.
11523
11524   Note: Locking is not within the scope of this class.
11525
11526   """
11527   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11528                disks, early_release, ignore_ipolicy):
11529     """Initializes this class.
11530
11531     """
11532     Tasklet.__init__(self, lu)
11533
11534     # Parameters
11535     self.instance_name = instance_name
11536     self.mode = mode
11537     self.iallocator_name = iallocator_name
11538     self.remote_node = remote_node
11539     self.disks = disks
11540     self.early_release = early_release
11541     self.ignore_ipolicy = ignore_ipolicy
11542
11543     # Runtime data
11544     self.instance = None
11545     self.new_node = None
11546     self.target_node = None
11547     self.other_node = None
11548     self.remote_node_info = None
11549     self.node_secondary_ip = None
11550
11551   @staticmethod
11552   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11553     """Compute a new secondary node using an IAllocator.
11554
11555     """
11556     req = iallocator.IAReqRelocate(name=instance_name,
11557                                    relocate_from=list(relocate_from))
11558     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11559
11560     ial.Run(iallocator_name)
11561
11562     if not ial.success:
11563       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11564                                  " %s" % (iallocator_name, ial.info),
11565                                  errors.ECODE_NORES)
11566
11567     remote_node_name = ial.result[0]
11568
11569     lu.LogInfo("Selected new secondary for instance '%s': %s",
11570                instance_name, remote_node_name)
11571
11572     return remote_node_name
11573
11574   def _FindFaultyDisks(self, node_name):
11575     """Wrapper for L{_FindFaultyInstanceDisks}.
11576
11577     """
11578     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11579                                     node_name, True)
11580
11581   def _CheckDisksActivated(self, instance):
11582     """Checks if the instance disks are activated.
11583
11584     @param instance: The instance to check disks
11585     @return: True if they are activated, False otherwise
11586
11587     """
11588     nodes = instance.all_nodes
11589
11590     for idx, dev in enumerate(instance.disks):
11591       for node in nodes:
11592         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11593         self.cfg.SetDiskID(dev, node)
11594
11595         result = _BlockdevFind(self, node, dev, instance)
11596
11597         if result.offline:
11598           continue
11599         elif result.fail_msg or not result.payload:
11600           return False
11601
11602     return True
11603
11604   def CheckPrereq(self):
11605     """Check prerequisites.
11606
11607     This checks that the instance is in the cluster.
11608
11609     """
11610     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11611     assert instance is not None, \
11612       "Cannot retrieve locked instance %s" % self.instance_name
11613
11614     if instance.disk_template != constants.DT_DRBD8:
11615       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11616                                  " instances", errors.ECODE_INVAL)
11617
11618     if len(instance.secondary_nodes) != 1:
11619       raise errors.OpPrereqError("The instance has a strange layout,"
11620                                  " expected one secondary but found %d" %
11621                                  len(instance.secondary_nodes),
11622                                  errors.ECODE_FAULT)
11623
11624     instance = self.instance
11625     secondary_node = instance.secondary_nodes[0]
11626
11627     if self.iallocator_name is None:
11628       remote_node = self.remote_node
11629     else:
11630       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11631                                        instance.name, instance.secondary_nodes)
11632
11633     if remote_node is None:
11634       self.remote_node_info = None
11635     else:
11636       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11637              "Remote node '%s' is not locked" % remote_node
11638
11639       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11640       assert self.remote_node_info is not None, \
11641         "Cannot retrieve locked node %s" % remote_node
11642
11643     if remote_node == self.instance.primary_node:
11644       raise errors.OpPrereqError("The specified node is the primary node of"
11645                                  " the instance", errors.ECODE_INVAL)
11646
11647     if remote_node == secondary_node:
11648       raise errors.OpPrereqError("The specified node is already the"
11649                                  " secondary node of the instance",
11650                                  errors.ECODE_INVAL)
11651
11652     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11653                                     constants.REPLACE_DISK_CHG):
11654       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11655                                  errors.ECODE_INVAL)
11656
11657     if self.mode == constants.REPLACE_DISK_AUTO:
11658       if not self._CheckDisksActivated(instance):
11659         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11660                                    " first" % self.instance_name,
11661                                    errors.ECODE_STATE)
11662       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11663       faulty_secondary = self._FindFaultyDisks(secondary_node)
11664
11665       if faulty_primary and faulty_secondary:
11666         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11667                                    " one node and can not be repaired"
11668                                    " automatically" % self.instance_name,
11669                                    errors.ECODE_STATE)
11670
11671       if faulty_primary:
11672         self.disks = faulty_primary
11673         self.target_node = instance.primary_node
11674         self.other_node = secondary_node
11675         check_nodes = [self.target_node, self.other_node]
11676       elif faulty_secondary:
11677         self.disks = faulty_secondary
11678         self.target_node = secondary_node
11679         self.other_node = instance.primary_node
11680         check_nodes = [self.target_node, self.other_node]
11681       else:
11682         self.disks = []
11683         check_nodes = []
11684
11685     else:
11686       # Non-automatic modes
11687       if self.mode == constants.REPLACE_DISK_PRI:
11688         self.target_node = instance.primary_node
11689         self.other_node = secondary_node
11690         check_nodes = [self.target_node, self.other_node]
11691
11692       elif self.mode == constants.REPLACE_DISK_SEC:
11693         self.target_node = secondary_node
11694         self.other_node = instance.primary_node
11695         check_nodes = [self.target_node, self.other_node]
11696
11697       elif self.mode == constants.REPLACE_DISK_CHG:
11698         self.new_node = remote_node
11699         self.other_node = instance.primary_node
11700         self.target_node = secondary_node
11701         check_nodes = [self.new_node, self.other_node]
11702
11703         _CheckNodeNotDrained(self.lu, remote_node)
11704         _CheckNodeVmCapable(self.lu, remote_node)
11705
11706         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11707         assert old_node_info is not None
11708         if old_node_info.offline and not self.early_release:
11709           # doesn't make sense to delay the release
11710           self.early_release = True
11711           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11712                           " early-release mode", secondary_node)
11713
11714       else:
11715         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11716                                      self.mode)
11717
11718       # If not specified all disks should be replaced
11719       if not self.disks:
11720         self.disks = range(len(self.instance.disks))
11721
11722     # TODO: This is ugly, but right now we can't distinguish between internal
11723     # submitted opcode and external one. We should fix that.
11724     if self.remote_node_info:
11725       # We change the node, lets verify it still meets instance policy
11726       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11727       cluster = self.cfg.GetClusterInfo()
11728       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11729                                                               new_group_info)
11730       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11731                               ignore=self.ignore_ipolicy)
11732
11733     for node in check_nodes:
11734       _CheckNodeOnline(self.lu, node)
11735
11736     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11737                                                           self.other_node,
11738                                                           self.target_node]
11739                               if node_name is not None)
11740
11741     # Release unneeded node and node resource locks
11742     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11743     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11744     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11745
11746     # Release any owned node group
11747     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11748
11749     # Check whether disks are valid
11750     for disk_idx in self.disks:
11751       instance.FindDisk(disk_idx)
11752
11753     # Get secondary node IP addresses
11754     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11755                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11756
11757   def Exec(self, feedback_fn):
11758     """Execute disk replacement.
11759
11760     This dispatches the disk replacement to the appropriate handler.
11761
11762     """
11763     if __debug__:
11764       # Verify owned locks before starting operation
11765       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11766       assert set(owned_nodes) == set(self.node_secondary_ip), \
11767           ("Incorrect node locks, owning %s, expected %s" %
11768            (owned_nodes, self.node_secondary_ip.keys()))
11769       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11770               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11771       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11772
11773       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11774       assert list(owned_instances) == [self.instance_name], \
11775           "Instance '%s' not locked" % self.instance_name
11776
11777       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11778           "Should not own any node group lock at this point"
11779
11780     if not self.disks:
11781       feedback_fn("No disks need replacement for instance '%s'" %
11782                   self.instance.name)
11783       return
11784
11785     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11786                 (utils.CommaJoin(self.disks), self.instance.name))
11787     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11788     feedback_fn("Current seconary node: %s" %
11789                 utils.CommaJoin(self.instance.secondary_nodes))
11790
11791     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11792
11793     # Activate the instance disks if we're replacing them on a down instance
11794     if activate_disks:
11795       _StartInstanceDisks(self.lu, self.instance, True)
11796
11797     try:
11798       # Should we replace the secondary node?
11799       if self.new_node is not None:
11800         fn = self._ExecDrbd8Secondary
11801       else:
11802         fn = self._ExecDrbd8DiskOnly
11803
11804       result = fn(feedback_fn)
11805     finally:
11806       # Deactivate the instance disks if we're replacing them on a
11807       # down instance
11808       if activate_disks:
11809         _SafeShutdownInstanceDisks(self.lu, self.instance)
11810
11811     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11812
11813     if __debug__:
11814       # Verify owned locks
11815       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11816       nodes = frozenset(self.node_secondary_ip)
11817       assert ((self.early_release and not owned_nodes) or
11818               (not self.early_release and not (set(owned_nodes) - nodes))), \
11819         ("Not owning the correct locks, early_release=%s, owned=%r,"
11820          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11821
11822     return result
11823
11824   def _CheckVolumeGroup(self, nodes):
11825     self.lu.LogInfo("Checking volume groups")
11826
11827     vgname = self.cfg.GetVGName()
11828
11829     # Make sure volume group exists on all involved nodes
11830     results = self.rpc.call_vg_list(nodes)
11831     if not results:
11832       raise errors.OpExecError("Can't list volume groups on the nodes")
11833
11834     for node in nodes:
11835       res = results[node]
11836       res.Raise("Error checking node %s" % node)
11837       if vgname not in res.payload:
11838         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11839                                  (vgname, node))
11840
11841   def _CheckDisksExistence(self, nodes):
11842     # Check disk existence
11843     for idx, dev in enumerate(self.instance.disks):
11844       if idx not in self.disks:
11845         continue
11846
11847       for node in nodes:
11848         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11849         self.cfg.SetDiskID(dev, node)
11850
11851         result = _BlockdevFind(self, node, dev, self.instance)
11852
11853         msg = result.fail_msg
11854         if msg or not result.payload:
11855           if not msg:
11856             msg = "disk not found"
11857           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11858                                    (idx, node, msg))
11859
11860   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11861     for idx, dev in enumerate(self.instance.disks):
11862       if idx not in self.disks:
11863         continue
11864
11865       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11866                       (idx, node_name))
11867
11868       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11869                                    on_primary, ldisk=ldisk):
11870         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11871                                  " replace disks for instance %s" %
11872                                  (node_name, self.instance.name))
11873
11874   def _CreateNewStorage(self, node_name):
11875     """Create new storage on the primary or secondary node.
11876
11877     This is only used for same-node replaces, not for changing the
11878     secondary node, hence we don't want to modify the existing disk.
11879
11880     """
11881     iv_names = {}
11882
11883     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11884     for idx, dev in enumerate(disks):
11885       if idx not in self.disks:
11886         continue
11887
11888       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11889
11890       self.cfg.SetDiskID(dev, node_name)
11891
11892       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11893       names = _GenerateUniqueNames(self.lu, lv_names)
11894
11895       (data_disk, meta_disk) = dev.children
11896       vg_data = data_disk.logical_id[0]
11897       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11898                              logical_id=(vg_data, names[0]),
11899                              params=data_disk.params)
11900       vg_meta = meta_disk.logical_id[0]
11901       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11902                              size=constants.DRBD_META_SIZE,
11903                              logical_id=(vg_meta, names[1]),
11904                              params=meta_disk.params)
11905
11906       new_lvs = [lv_data, lv_meta]
11907       old_lvs = [child.Copy() for child in dev.children]
11908       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11909       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11910
11911       # we pass force_create=True to force the LVM creation
11912       for new_lv in new_lvs:
11913         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11914                              _GetInstanceInfoText(self.instance), False,
11915                              excl_stor)
11916
11917     return iv_names
11918
11919   def _CheckDevices(self, node_name, iv_names):
11920     for name, (dev, _, _) in iv_names.iteritems():
11921       self.cfg.SetDiskID(dev, node_name)
11922
11923       result = _BlockdevFind(self, node_name, dev, self.instance)
11924
11925       msg = result.fail_msg
11926       if msg or not result.payload:
11927         if not msg:
11928           msg = "disk not found"
11929         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11930                                  (name, msg))
11931
11932       if result.payload.is_degraded:
11933         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11934
11935   def _RemoveOldStorage(self, node_name, iv_names):
11936     for name, (_, old_lvs, _) in iv_names.iteritems():
11937       self.lu.LogInfo("Remove logical volumes for %s", name)
11938
11939       for lv in old_lvs:
11940         self.cfg.SetDiskID(lv, node_name)
11941
11942         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11943         if msg:
11944           self.lu.LogWarning("Can't remove old LV: %s", msg,
11945                              hint="remove unused LVs manually")
11946
11947   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11948     """Replace a disk on the primary or secondary for DRBD 8.
11949
11950     The algorithm for replace is quite complicated:
11951
11952       1. for each disk to be replaced:
11953
11954         1. create new LVs on the target node with unique names
11955         1. detach old LVs from the drbd device
11956         1. rename old LVs to name_replaced.<time_t>
11957         1. rename new LVs to old LVs
11958         1. attach the new LVs (with the old names now) to the drbd device
11959
11960       1. wait for sync across all devices
11961
11962       1. for each modified disk:
11963
11964         1. remove old LVs (which have the name name_replaces.<time_t>)
11965
11966     Failures are not very well handled.
11967
11968     """
11969     steps_total = 6
11970
11971     # Step: check device activation
11972     self.lu.LogStep(1, steps_total, "Check device existence")
11973     self._CheckDisksExistence([self.other_node, self.target_node])
11974     self._CheckVolumeGroup([self.target_node, self.other_node])
11975
11976     # Step: check other node consistency
11977     self.lu.LogStep(2, steps_total, "Check peer consistency")
11978     self._CheckDisksConsistency(self.other_node,
11979                                 self.other_node == self.instance.primary_node,
11980                                 False)
11981
11982     # Step: create new storage
11983     self.lu.LogStep(3, steps_total, "Allocate new storage")
11984     iv_names = self._CreateNewStorage(self.target_node)
11985
11986     # Step: for each lv, detach+rename*2+attach
11987     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11988     for dev, old_lvs, new_lvs in iv_names.itervalues():
11989       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11990
11991       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11992                                                      old_lvs)
11993       result.Raise("Can't detach drbd from local storage on node"
11994                    " %s for device %s" % (self.target_node, dev.iv_name))
11995       #dev.children = []
11996       #cfg.Update(instance)
11997
11998       # ok, we created the new LVs, so now we know we have the needed
11999       # storage; as such, we proceed on the target node to rename
12000       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12001       # using the assumption that logical_id == physical_id (which in
12002       # turn is the unique_id on that node)
12003
12004       # FIXME(iustin): use a better name for the replaced LVs
12005       temp_suffix = int(time.time())
12006       ren_fn = lambda d, suff: (d.physical_id[0],
12007                                 d.physical_id[1] + "_replaced-%s" % suff)
12008
12009       # Build the rename list based on what LVs exist on the node
12010       rename_old_to_new = []
12011       for to_ren in old_lvs:
12012         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12013         if not result.fail_msg and result.payload:
12014           # device exists
12015           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12016
12017       self.lu.LogInfo("Renaming the old LVs on the target node")
12018       result = self.rpc.call_blockdev_rename(self.target_node,
12019                                              rename_old_to_new)
12020       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12021
12022       # Now we rename the new LVs to the old LVs
12023       self.lu.LogInfo("Renaming the new LVs on the target node")
12024       rename_new_to_old = [(new, old.physical_id)
12025                            for old, new in zip(old_lvs, new_lvs)]
12026       result = self.rpc.call_blockdev_rename(self.target_node,
12027                                              rename_new_to_old)
12028       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12029
12030       # Intermediate steps of in memory modifications
12031       for old, new in zip(old_lvs, new_lvs):
12032         new.logical_id = old.logical_id
12033         self.cfg.SetDiskID(new, self.target_node)
12034
12035       # We need to modify old_lvs so that removal later removes the
12036       # right LVs, not the newly added ones; note that old_lvs is a
12037       # copy here
12038       for disk in old_lvs:
12039         disk.logical_id = ren_fn(disk, temp_suffix)
12040         self.cfg.SetDiskID(disk, self.target_node)
12041
12042       # Now that the new lvs have the old name, we can add them to the device
12043       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12044       result = self.rpc.call_blockdev_addchildren(self.target_node,
12045                                                   (dev, self.instance), new_lvs)
12046       msg = result.fail_msg
12047       if msg:
12048         for new_lv in new_lvs:
12049           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12050                                                new_lv).fail_msg
12051           if msg2:
12052             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12053                                hint=("cleanup manually the unused logical"
12054                                      "volumes"))
12055         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12056
12057     cstep = itertools.count(5)
12058
12059     if self.early_release:
12060       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12061       self._RemoveOldStorage(self.target_node, iv_names)
12062       # TODO: Check if releasing locks early still makes sense
12063       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12064     else:
12065       # Release all resource locks except those used by the instance
12066       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12067                     keep=self.node_secondary_ip.keys())
12068
12069     # Release all node locks while waiting for sync
12070     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12071
12072     # TODO: Can the instance lock be downgraded here? Take the optional disk
12073     # shutdown in the caller into consideration.
12074
12075     # Wait for sync
12076     # This can fail as the old devices are degraded and _WaitForSync
12077     # does a combined result over all disks, so we don't check its return value
12078     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12079     _WaitForSync(self.lu, self.instance)
12080
12081     # Check all devices manually
12082     self._CheckDevices(self.instance.primary_node, iv_names)
12083
12084     # Step: remove old storage
12085     if not self.early_release:
12086       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12087       self._RemoveOldStorage(self.target_node, iv_names)
12088
12089   def _ExecDrbd8Secondary(self, feedback_fn):
12090     """Replace the secondary node for DRBD 8.
12091
12092     The algorithm for replace is quite complicated:
12093       - for all disks of the instance:
12094         - create new LVs on the new node with same names
12095         - shutdown the drbd device on the old secondary
12096         - disconnect the drbd network on the primary
12097         - create the drbd device on the new secondary
12098         - network attach the drbd on the primary, using an artifice:
12099           the drbd code for Attach() will connect to the network if it
12100           finds a device which is connected to the good local disks but
12101           not network enabled
12102       - wait for sync across all devices
12103       - remove all disks from the old secondary
12104
12105     Failures are not very well handled.
12106
12107     """
12108     steps_total = 6
12109
12110     pnode = self.instance.primary_node
12111
12112     # Step: check device activation
12113     self.lu.LogStep(1, steps_total, "Check device existence")
12114     self._CheckDisksExistence([self.instance.primary_node])
12115     self._CheckVolumeGroup([self.instance.primary_node])
12116
12117     # Step: check other node consistency
12118     self.lu.LogStep(2, steps_total, "Check peer consistency")
12119     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12120
12121     # Step: create new storage
12122     self.lu.LogStep(3, steps_total, "Allocate new storage")
12123     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12124     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12125     for idx, dev in enumerate(disks):
12126       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12127                       (self.new_node, idx))
12128       # we pass force_create=True to force LVM creation
12129       for new_lv in dev.children:
12130         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12131                              True, _GetInstanceInfoText(self.instance), False,
12132                              excl_stor)
12133
12134     # Step 4: dbrd minors and drbd setups changes
12135     # after this, we must manually remove the drbd minors on both the
12136     # error and the success paths
12137     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12138     minors = self.cfg.AllocateDRBDMinor([self.new_node
12139                                          for dev in self.instance.disks],
12140                                         self.instance.name)
12141     logging.debug("Allocated minors %r", minors)
12142
12143     iv_names = {}
12144     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12145       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12146                       (self.new_node, idx))
12147       # create new devices on new_node; note that we create two IDs:
12148       # one without port, so the drbd will be activated without
12149       # networking information on the new node at this stage, and one
12150       # with network, for the latter activation in step 4
12151       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12152       if self.instance.primary_node == o_node1:
12153         p_minor = o_minor1
12154       else:
12155         assert self.instance.primary_node == o_node2, "Three-node instance?"
12156         p_minor = o_minor2
12157
12158       new_alone_id = (self.instance.primary_node, self.new_node, None,
12159                       p_minor, new_minor, o_secret)
12160       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12161                     p_minor, new_minor, o_secret)
12162
12163       iv_names[idx] = (dev, dev.children, new_net_id)
12164       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12165                     new_net_id)
12166       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12167                               logical_id=new_alone_id,
12168                               children=dev.children,
12169                               size=dev.size,
12170                               params={})
12171       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12172                                              self.cfg)
12173       try:
12174         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12175                               anno_new_drbd,
12176                               _GetInstanceInfoText(self.instance), False,
12177                               excl_stor)
12178       except errors.GenericError:
12179         self.cfg.ReleaseDRBDMinors(self.instance.name)
12180         raise
12181
12182     # We have new devices, shutdown the drbd on the old secondary
12183     for idx, dev in enumerate(self.instance.disks):
12184       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12185       self.cfg.SetDiskID(dev, self.target_node)
12186       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12187                                             (dev, self.instance)).fail_msg
12188       if msg:
12189         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12190                            "node: %s" % (idx, msg),
12191                            hint=("Please cleanup this device manually as"
12192                                  " soon as possible"))
12193
12194     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12195     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12196                                                self.instance.disks)[pnode]
12197
12198     msg = result.fail_msg
12199     if msg:
12200       # detaches didn't succeed (unlikely)
12201       self.cfg.ReleaseDRBDMinors(self.instance.name)
12202       raise errors.OpExecError("Can't detach the disks from the network on"
12203                                " old node: %s" % (msg,))
12204
12205     # if we managed to detach at least one, we update all the disks of
12206     # the instance to point to the new secondary
12207     self.lu.LogInfo("Updating instance configuration")
12208     for dev, _, new_logical_id in iv_names.itervalues():
12209       dev.logical_id = new_logical_id
12210       self.cfg.SetDiskID(dev, self.instance.primary_node)
12211
12212     self.cfg.Update(self.instance, feedback_fn)
12213
12214     # Release all node locks (the configuration has been updated)
12215     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12216
12217     # and now perform the drbd attach
12218     self.lu.LogInfo("Attaching primary drbds to new secondary"
12219                     " (standalone => connected)")
12220     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12221                                             self.new_node],
12222                                            self.node_secondary_ip,
12223                                            (self.instance.disks, self.instance),
12224                                            self.instance.name,
12225                                            False)
12226     for to_node, to_result in result.items():
12227       msg = to_result.fail_msg
12228       if msg:
12229         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12230                            to_node, msg,
12231                            hint=("please do a gnt-instance info to see the"
12232                                  " status of disks"))
12233
12234     cstep = itertools.count(5)
12235
12236     if self.early_release:
12237       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12238       self._RemoveOldStorage(self.target_node, iv_names)
12239       # TODO: Check if releasing locks early still makes sense
12240       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12241     else:
12242       # Release all resource locks except those used by the instance
12243       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12244                     keep=self.node_secondary_ip.keys())
12245
12246     # TODO: Can the instance lock be downgraded here? Take the optional disk
12247     # shutdown in the caller into consideration.
12248
12249     # Wait for sync
12250     # This can fail as the old devices are degraded and _WaitForSync
12251     # does a combined result over all disks, so we don't check its return value
12252     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12253     _WaitForSync(self.lu, self.instance)
12254
12255     # Check all devices manually
12256     self._CheckDevices(self.instance.primary_node, iv_names)
12257
12258     # Step: remove old storage
12259     if not self.early_release:
12260       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12261       self._RemoveOldStorage(self.target_node, iv_names)
12262
12263
12264 class LURepairNodeStorage(NoHooksLU):
12265   """Repairs the volume group on a node.
12266
12267   """
12268   REQ_BGL = False
12269
12270   def CheckArguments(self):
12271     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12272
12273     storage_type = self.op.storage_type
12274
12275     if (constants.SO_FIX_CONSISTENCY not in
12276         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12277       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12278                                  " repaired" % storage_type,
12279                                  errors.ECODE_INVAL)
12280
12281   def ExpandNames(self):
12282     self.needed_locks = {
12283       locking.LEVEL_NODE: [self.op.node_name],
12284       }
12285
12286   def _CheckFaultyDisks(self, instance, node_name):
12287     """Ensure faulty disks abort the opcode or at least warn."""
12288     try:
12289       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12290                                   node_name, True):
12291         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12292                                    " node '%s'" % (instance.name, node_name),
12293                                    errors.ECODE_STATE)
12294     except errors.OpPrereqError, err:
12295       if self.op.ignore_consistency:
12296         self.LogWarning(str(err.args[0]))
12297       else:
12298         raise
12299
12300   def CheckPrereq(self):
12301     """Check prerequisites.
12302
12303     """
12304     # Check whether any instance on this node has faulty disks
12305     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12306       if inst.admin_state != constants.ADMINST_UP:
12307         continue
12308       check_nodes = set(inst.all_nodes)
12309       check_nodes.discard(self.op.node_name)
12310       for inst_node_name in check_nodes:
12311         self._CheckFaultyDisks(inst, inst_node_name)
12312
12313   def Exec(self, feedback_fn):
12314     feedback_fn("Repairing storage unit '%s' on %s ..." %
12315                 (self.op.name, self.op.node_name))
12316
12317     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12318     result = self.rpc.call_storage_execute(self.op.node_name,
12319                                            self.op.storage_type, st_args,
12320                                            self.op.name,
12321                                            constants.SO_FIX_CONSISTENCY)
12322     result.Raise("Failed to repair storage unit '%s' on %s" %
12323                  (self.op.name, self.op.node_name))
12324
12325
12326 class LUNodeEvacuate(NoHooksLU):
12327   """Evacuates instances off a list of nodes.
12328
12329   """
12330   REQ_BGL = False
12331
12332   _MODE2IALLOCATOR = {
12333     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12334     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12335     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12336     }
12337   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12338   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12339           constants.IALLOCATOR_NEVAC_MODES)
12340
12341   def CheckArguments(self):
12342     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12343
12344   def ExpandNames(self):
12345     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12346
12347     if self.op.remote_node is not None:
12348       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12349       assert self.op.remote_node
12350
12351       if self.op.remote_node == self.op.node_name:
12352         raise errors.OpPrereqError("Can not use evacuated node as a new"
12353                                    " secondary node", errors.ECODE_INVAL)
12354
12355       if self.op.mode != constants.NODE_EVAC_SEC:
12356         raise errors.OpPrereqError("Without the use of an iallocator only"
12357                                    " secondary instances can be evacuated",
12358                                    errors.ECODE_INVAL)
12359
12360     # Declare locks
12361     self.share_locks = _ShareAll()
12362     self.needed_locks = {
12363       locking.LEVEL_INSTANCE: [],
12364       locking.LEVEL_NODEGROUP: [],
12365       locking.LEVEL_NODE: [],
12366       }
12367
12368     # Determine nodes (via group) optimistically, needs verification once locks
12369     # have been acquired
12370     self.lock_nodes = self._DetermineNodes()
12371
12372   def _DetermineNodes(self):
12373     """Gets the list of nodes to operate on.
12374
12375     """
12376     if self.op.remote_node is None:
12377       # Iallocator will choose any node(s) in the same group
12378       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12379     else:
12380       group_nodes = frozenset([self.op.remote_node])
12381
12382     # Determine nodes to be locked
12383     return set([self.op.node_name]) | group_nodes
12384
12385   def _DetermineInstances(self):
12386     """Builds list of instances to operate on.
12387
12388     """
12389     assert self.op.mode in constants.NODE_EVAC_MODES
12390
12391     if self.op.mode == constants.NODE_EVAC_PRI:
12392       # Primary instances only
12393       inst_fn = _GetNodePrimaryInstances
12394       assert self.op.remote_node is None, \
12395         "Evacuating primary instances requires iallocator"
12396     elif self.op.mode == constants.NODE_EVAC_SEC:
12397       # Secondary instances only
12398       inst_fn = _GetNodeSecondaryInstances
12399     else:
12400       # All instances
12401       assert self.op.mode == constants.NODE_EVAC_ALL
12402       inst_fn = _GetNodeInstances
12403       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12404       # per instance
12405       raise errors.OpPrereqError("Due to an issue with the iallocator"
12406                                  " interface it is not possible to evacuate"
12407                                  " all instances at once; specify explicitly"
12408                                  " whether to evacuate primary or secondary"
12409                                  " instances",
12410                                  errors.ECODE_INVAL)
12411
12412     return inst_fn(self.cfg, self.op.node_name)
12413
12414   def DeclareLocks(self, level):
12415     if level == locking.LEVEL_INSTANCE:
12416       # Lock instances optimistically, needs verification once node and group
12417       # locks have been acquired
12418       self.needed_locks[locking.LEVEL_INSTANCE] = \
12419         set(i.name for i in self._DetermineInstances())
12420
12421     elif level == locking.LEVEL_NODEGROUP:
12422       # Lock node groups for all potential target nodes optimistically, needs
12423       # verification once nodes have been acquired
12424       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12425         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12426
12427     elif level == locking.LEVEL_NODE:
12428       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12429
12430   def CheckPrereq(self):
12431     # Verify locks
12432     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12433     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12434     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12435
12436     need_nodes = self._DetermineNodes()
12437
12438     if not owned_nodes.issuperset(need_nodes):
12439       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12440                                  " locks were acquired, current nodes are"
12441                                  " are '%s', used to be '%s'; retry the"
12442                                  " operation" %
12443                                  (self.op.node_name,
12444                                   utils.CommaJoin(need_nodes),
12445                                   utils.CommaJoin(owned_nodes)),
12446                                  errors.ECODE_STATE)
12447
12448     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12449     if owned_groups != wanted_groups:
12450       raise errors.OpExecError("Node groups changed since locks were acquired,"
12451                                " current groups are '%s', used to be '%s';"
12452                                " retry the operation" %
12453                                (utils.CommaJoin(wanted_groups),
12454                                 utils.CommaJoin(owned_groups)))
12455
12456     # Determine affected instances
12457     self.instances = self._DetermineInstances()
12458     self.instance_names = [i.name for i in self.instances]
12459
12460     if set(self.instance_names) != owned_instances:
12461       raise errors.OpExecError("Instances on node '%s' changed since locks"
12462                                " were acquired, current instances are '%s',"
12463                                " used to be '%s'; retry the operation" %
12464                                (self.op.node_name,
12465                                 utils.CommaJoin(self.instance_names),
12466                                 utils.CommaJoin(owned_instances)))
12467
12468     if self.instance_names:
12469       self.LogInfo("Evacuating instances from node '%s': %s",
12470                    self.op.node_name,
12471                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12472     else:
12473       self.LogInfo("No instances to evacuate from node '%s'",
12474                    self.op.node_name)
12475
12476     if self.op.remote_node is not None:
12477       for i in self.instances:
12478         if i.primary_node == self.op.remote_node:
12479           raise errors.OpPrereqError("Node %s is the primary node of"
12480                                      " instance %s, cannot use it as"
12481                                      " secondary" %
12482                                      (self.op.remote_node, i.name),
12483                                      errors.ECODE_INVAL)
12484
12485   def Exec(self, feedback_fn):
12486     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12487
12488     if not self.instance_names:
12489       # No instances to evacuate
12490       jobs = []
12491
12492     elif self.op.iallocator is not None:
12493       # TODO: Implement relocation to other group
12494       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12495       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12496                                      instances=list(self.instance_names))
12497       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12498
12499       ial.Run(self.op.iallocator)
12500
12501       if not ial.success:
12502         raise errors.OpPrereqError("Can't compute node evacuation using"
12503                                    " iallocator '%s': %s" %
12504                                    (self.op.iallocator, ial.info),
12505                                    errors.ECODE_NORES)
12506
12507       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12508
12509     elif self.op.remote_node is not None:
12510       assert self.op.mode == constants.NODE_EVAC_SEC
12511       jobs = [
12512         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12513                                         remote_node=self.op.remote_node,
12514                                         disks=[],
12515                                         mode=constants.REPLACE_DISK_CHG,
12516                                         early_release=self.op.early_release)]
12517         for instance_name in self.instance_names]
12518
12519     else:
12520       raise errors.ProgrammerError("No iallocator or remote node")
12521
12522     return ResultWithJobs(jobs)
12523
12524
12525 def _SetOpEarlyRelease(early_release, op):
12526   """Sets C{early_release} flag on opcodes if available.
12527
12528   """
12529   try:
12530     op.early_release = early_release
12531   except AttributeError:
12532     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12533
12534   return op
12535
12536
12537 def _NodeEvacDest(use_nodes, group, nodes):
12538   """Returns group or nodes depending on caller's choice.
12539
12540   """
12541   if use_nodes:
12542     return utils.CommaJoin(nodes)
12543   else:
12544     return group
12545
12546
12547 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12548   """Unpacks the result of change-group and node-evacuate iallocator requests.
12549
12550   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12551   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12552
12553   @type lu: L{LogicalUnit}
12554   @param lu: Logical unit instance
12555   @type alloc_result: tuple/list
12556   @param alloc_result: Result from iallocator
12557   @type early_release: bool
12558   @param early_release: Whether to release locks early if possible
12559   @type use_nodes: bool
12560   @param use_nodes: Whether to display node names instead of groups
12561
12562   """
12563   (moved, failed, jobs) = alloc_result
12564
12565   if failed:
12566     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12567                                  for (name, reason) in failed)
12568     lu.LogWarning("Unable to evacuate instances %s", failreason)
12569     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12570
12571   if moved:
12572     lu.LogInfo("Instances to be moved: %s",
12573                utils.CommaJoin("%s (to %s)" %
12574                                (name, _NodeEvacDest(use_nodes, group, nodes))
12575                                for (name, group, nodes) in moved))
12576
12577   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12578               map(opcodes.OpCode.LoadOpCode, ops))
12579           for ops in jobs]
12580
12581
12582 def _DiskSizeInBytesToMebibytes(lu, size):
12583   """Converts a disk size in bytes to mebibytes.
12584
12585   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12586
12587   """
12588   (mib, remainder) = divmod(size, 1024 * 1024)
12589
12590   if remainder != 0:
12591     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12592                   " to not overwrite existing data (%s bytes will not be"
12593                   " wiped)", (1024 * 1024) - remainder)
12594     mib += 1
12595
12596   return mib
12597
12598
12599 class LUInstanceGrowDisk(LogicalUnit):
12600   """Grow a disk of an instance.
12601
12602   """
12603   HPATH = "disk-grow"
12604   HTYPE = constants.HTYPE_INSTANCE
12605   REQ_BGL = False
12606
12607   def ExpandNames(self):
12608     self._ExpandAndLockInstance()
12609     self.needed_locks[locking.LEVEL_NODE] = []
12610     self.needed_locks[locking.LEVEL_NODE_RES] = []
12611     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12612     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12613
12614   def DeclareLocks(self, level):
12615     if level == locking.LEVEL_NODE:
12616       self._LockInstancesNodes()
12617     elif level == locking.LEVEL_NODE_RES:
12618       # Copy node locks
12619       self.needed_locks[locking.LEVEL_NODE_RES] = \
12620         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12621
12622   def BuildHooksEnv(self):
12623     """Build hooks env.
12624
12625     This runs on the master, the primary and all the secondaries.
12626
12627     """
12628     env = {
12629       "DISK": self.op.disk,
12630       "AMOUNT": self.op.amount,
12631       "ABSOLUTE": self.op.absolute,
12632       }
12633     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12634     return env
12635
12636   def BuildHooksNodes(self):
12637     """Build hooks nodes.
12638
12639     """
12640     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12641     return (nl, nl)
12642
12643   def CheckPrereq(self):
12644     """Check prerequisites.
12645
12646     This checks that the instance is in the cluster.
12647
12648     """
12649     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12650     assert instance is not None, \
12651       "Cannot retrieve locked instance %s" % self.op.instance_name
12652     nodenames = list(instance.all_nodes)
12653     for node in nodenames:
12654       _CheckNodeOnline(self, node)
12655
12656     self.instance = instance
12657
12658     if instance.disk_template not in constants.DTS_GROWABLE:
12659       raise errors.OpPrereqError("Instance's disk layout does not support"
12660                                  " growing", errors.ECODE_INVAL)
12661
12662     self.disk = instance.FindDisk(self.op.disk)
12663
12664     if self.op.absolute:
12665       self.target = self.op.amount
12666       self.delta = self.target - self.disk.size
12667       if self.delta < 0:
12668         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12669                                    "current disk size (%s)" %
12670                                    (utils.FormatUnit(self.target, "h"),
12671                                     utils.FormatUnit(self.disk.size, "h")),
12672                                    errors.ECODE_STATE)
12673     else:
12674       self.delta = self.op.amount
12675       self.target = self.disk.size + self.delta
12676       if self.delta < 0:
12677         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12678                                    utils.FormatUnit(self.delta, "h"),
12679                                    errors.ECODE_INVAL)
12680
12681     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12682
12683   def _CheckDiskSpace(self, nodenames, req_vgspace):
12684     template = self.instance.disk_template
12685     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12686       # TODO: check the free disk space for file, when that feature will be
12687       # supported
12688       nodes = map(self.cfg.GetNodeInfo, nodenames)
12689       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12690                         nodes)
12691       if es_nodes:
12692         # With exclusive storage we need to something smarter than just looking
12693         # at free space; for now, let's simply abort the operation.
12694         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12695                                    " is enabled", errors.ECODE_STATE)
12696       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12697
12698   def Exec(self, feedback_fn):
12699     """Execute disk grow.
12700
12701     """
12702     instance = self.instance
12703     disk = self.disk
12704
12705     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12706     assert (self.owned_locks(locking.LEVEL_NODE) ==
12707             self.owned_locks(locking.LEVEL_NODE_RES))
12708
12709     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12710
12711     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12712     if not disks_ok:
12713       raise errors.OpExecError("Cannot activate block device to grow")
12714
12715     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12716                 (self.op.disk, instance.name,
12717                  utils.FormatUnit(self.delta, "h"),
12718                  utils.FormatUnit(self.target, "h")))
12719
12720     # First run all grow ops in dry-run mode
12721     for node in instance.all_nodes:
12722       self.cfg.SetDiskID(disk, node)
12723       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12724                                            True, True)
12725       result.Raise("Dry-run grow request failed to node %s" % node)
12726
12727     if wipe_disks:
12728       # Get disk size from primary node for wiping
12729       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12730       result.Raise("Failed to retrieve disk size from node '%s'" %
12731                    instance.primary_node)
12732
12733       (disk_size_in_bytes, ) = result.payload
12734
12735       if disk_size_in_bytes is None:
12736         raise errors.OpExecError("Failed to retrieve disk size from primary"
12737                                  " node '%s'" % instance.primary_node)
12738
12739       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12740
12741       assert old_disk_size >= disk.size, \
12742         ("Retrieved disk size too small (got %s, should be at least %s)" %
12743          (old_disk_size, disk.size))
12744     else:
12745       old_disk_size = None
12746
12747     # We know that (as far as we can test) operations across different
12748     # nodes will succeed, time to run it for real on the backing storage
12749     for node in instance.all_nodes:
12750       self.cfg.SetDiskID(disk, node)
12751       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12752                                            False, True)
12753       result.Raise("Grow request failed to node %s" % node)
12754
12755     # And now execute it for logical storage, on the primary node
12756     node = instance.primary_node
12757     self.cfg.SetDiskID(disk, node)
12758     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12759                                          False, False)
12760     result.Raise("Grow request failed to node %s" % node)
12761
12762     disk.RecordGrow(self.delta)
12763     self.cfg.Update(instance, feedback_fn)
12764
12765     # Changes have been recorded, release node lock
12766     _ReleaseLocks(self, locking.LEVEL_NODE)
12767
12768     # Downgrade lock while waiting for sync
12769     self.glm.downgrade(locking.LEVEL_INSTANCE)
12770
12771     assert wipe_disks ^ (old_disk_size is None)
12772
12773     if wipe_disks:
12774       assert instance.disks[self.op.disk] == disk
12775
12776       # Wipe newly added disk space
12777       _WipeDisks(self, instance,
12778                  disks=[(self.op.disk, disk, old_disk_size)])
12779
12780     if self.op.wait_for_sync:
12781       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12782       if disk_abort:
12783         self.LogWarning("Disk syncing has not returned a good status; check"
12784                         " the instance")
12785       if instance.admin_state != constants.ADMINST_UP:
12786         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12787     elif instance.admin_state != constants.ADMINST_UP:
12788       self.LogWarning("Not shutting down the disk even if the instance is"
12789                       " not supposed to be running because no wait for"
12790                       " sync mode was requested")
12791
12792     assert self.owned_locks(locking.LEVEL_NODE_RES)
12793     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12794
12795
12796 class LUInstanceQueryData(NoHooksLU):
12797   """Query runtime instance data.
12798
12799   """
12800   REQ_BGL = False
12801
12802   def ExpandNames(self):
12803     self.needed_locks = {}
12804
12805     # Use locking if requested or when non-static information is wanted
12806     if not (self.op.static or self.op.use_locking):
12807       self.LogWarning("Non-static data requested, locks need to be acquired")
12808       self.op.use_locking = True
12809
12810     if self.op.instances or not self.op.use_locking:
12811       # Expand instance names right here
12812       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12813     else:
12814       # Will use acquired locks
12815       self.wanted_names = None
12816
12817     if self.op.use_locking:
12818       self.share_locks = _ShareAll()
12819
12820       if self.wanted_names is None:
12821         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12822       else:
12823         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12824
12825       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12826       self.needed_locks[locking.LEVEL_NODE] = []
12827       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12828
12829   def DeclareLocks(self, level):
12830     if self.op.use_locking:
12831       if level == locking.LEVEL_NODEGROUP:
12832         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12833
12834         # Lock all groups used by instances optimistically; this requires going
12835         # via the node before it's locked, requiring verification later on
12836         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12837           frozenset(group_uuid
12838                     for instance_name in owned_instances
12839                     for group_uuid in
12840                       self.cfg.GetInstanceNodeGroups(instance_name))
12841
12842       elif level == locking.LEVEL_NODE:
12843         self._LockInstancesNodes()
12844
12845   def CheckPrereq(self):
12846     """Check prerequisites.
12847
12848     This only checks the optional instance list against the existing names.
12849
12850     """
12851     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12852     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12853     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12854
12855     if self.wanted_names is None:
12856       assert self.op.use_locking, "Locking was not used"
12857       self.wanted_names = owned_instances
12858
12859     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12860
12861     if self.op.use_locking:
12862       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12863                                 None)
12864     else:
12865       assert not (owned_instances or owned_groups or owned_nodes)
12866
12867     self.wanted_instances = instances.values()
12868
12869   def _ComputeBlockdevStatus(self, node, instance, dev):
12870     """Returns the status of a block device
12871
12872     """
12873     if self.op.static or not node:
12874       return None
12875
12876     self.cfg.SetDiskID(dev, node)
12877
12878     result = self.rpc.call_blockdev_find(node, dev)
12879     if result.offline:
12880       return None
12881
12882     result.Raise("Can't compute disk status for %s" % instance.name)
12883
12884     status = result.payload
12885     if status is None:
12886       return None
12887
12888     return (status.dev_path, status.major, status.minor,
12889             status.sync_percent, status.estimated_time,
12890             status.is_degraded, status.ldisk_status)
12891
12892   def _ComputeDiskStatus(self, instance, snode, dev):
12893     """Compute block device status.
12894
12895     """
12896     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12897
12898     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12899
12900   def _ComputeDiskStatusInner(self, instance, snode, dev):
12901     """Compute block device status.
12902
12903     @attention: The device has to be annotated already.
12904
12905     """
12906     if dev.dev_type in constants.LDS_DRBD:
12907       # we change the snode then (otherwise we use the one passed in)
12908       if dev.logical_id[0] == instance.primary_node:
12909         snode = dev.logical_id[1]
12910       else:
12911         snode = dev.logical_id[0]
12912
12913     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12914                                               instance, dev)
12915     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12916
12917     if dev.children:
12918       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12919                                         instance, snode),
12920                          dev.children)
12921     else:
12922       dev_children = []
12923
12924     return {
12925       "iv_name": dev.iv_name,
12926       "dev_type": dev.dev_type,
12927       "logical_id": dev.logical_id,
12928       "physical_id": dev.physical_id,
12929       "pstatus": dev_pstatus,
12930       "sstatus": dev_sstatus,
12931       "children": dev_children,
12932       "mode": dev.mode,
12933       "size": dev.size,
12934       }
12935
12936   def Exec(self, feedback_fn):
12937     """Gather and return data"""
12938     result = {}
12939
12940     cluster = self.cfg.GetClusterInfo()
12941
12942     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12943     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12944
12945     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12946                                                  for node in nodes.values()))
12947
12948     group2name_fn = lambda uuid: groups[uuid].name
12949
12950     for instance in self.wanted_instances:
12951       pnode = nodes[instance.primary_node]
12952
12953       if self.op.static or pnode.offline:
12954         remote_state = None
12955         if pnode.offline:
12956           self.LogWarning("Primary node %s is marked offline, returning static"
12957                           " information only for instance %s" %
12958                           (pnode.name, instance.name))
12959       else:
12960         remote_info = self.rpc.call_instance_info(instance.primary_node,
12961                                                   instance.name,
12962                                                   instance.hypervisor)
12963         remote_info.Raise("Error checking node %s" % instance.primary_node)
12964         remote_info = remote_info.payload
12965         if remote_info and "state" in remote_info:
12966           remote_state = "up"
12967         else:
12968           if instance.admin_state == constants.ADMINST_UP:
12969             remote_state = "down"
12970           else:
12971             remote_state = instance.admin_state
12972
12973       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12974                   instance.disks)
12975
12976       snodes_group_uuids = [nodes[snode_name].group
12977                             for snode_name in instance.secondary_nodes]
12978
12979       result[instance.name] = {
12980         "name": instance.name,
12981         "config_state": instance.admin_state,
12982         "run_state": remote_state,
12983         "pnode": instance.primary_node,
12984         "pnode_group_uuid": pnode.group,
12985         "pnode_group_name": group2name_fn(pnode.group),
12986         "snodes": instance.secondary_nodes,
12987         "snodes_group_uuids": snodes_group_uuids,
12988         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12989         "os": instance.os,
12990         # this happens to be the same format used for hooks
12991         "nics": _NICListToTuple(self, instance.nics),
12992         "disk_template": instance.disk_template,
12993         "disks": disks,
12994         "hypervisor": instance.hypervisor,
12995         "network_port": instance.network_port,
12996         "hv_instance": instance.hvparams,
12997         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12998         "be_instance": instance.beparams,
12999         "be_actual": cluster.FillBE(instance),
13000         "os_instance": instance.osparams,
13001         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13002         "serial_no": instance.serial_no,
13003         "mtime": instance.mtime,
13004         "ctime": instance.ctime,
13005         "uuid": instance.uuid,
13006         }
13007
13008     return result
13009
13010
13011 def PrepareContainerMods(mods, private_fn):
13012   """Prepares a list of container modifications by adding a private data field.
13013
13014   @type mods: list of tuples; (operation, index, parameters)
13015   @param mods: List of modifications
13016   @type private_fn: callable or None
13017   @param private_fn: Callable for constructing a private data field for a
13018     modification
13019   @rtype: list
13020
13021   """
13022   if private_fn is None:
13023     fn = lambda: None
13024   else:
13025     fn = private_fn
13026
13027   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13028
13029
13030 #: Type description for changes as returned by L{ApplyContainerMods}'s
13031 #: callbacks
13032 _TApplyContModsCbChanges = \
13033   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13034     ht.TNonEmptyString,
13035     ht.TAny,
13036     ])))
13037
13038
13039 def ApplyContainerMods(kind, container, chgdesc, mods,
13040                        create_fn, modify_fn, remove_fn):
13041   """Applies descriptions in C{mods} to C{container}.
13042
13043   @type kind: string
13044   @param kind: One-word item description
13045   @type container: list
13046   @param container: Container to modify
13047   @type chgdesc: None or list
13048   @param chgdesc: List of applied changes
13049   @type mods: list
13050   @param mods: Modifications as returned by L{PrepareContainerMods}
13051   @type create_fn: callable
13052   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13053     receives absolute item index, parameters and private data object as added
13054     by L{PrepareContainerMods}, returns tuple containing new item and changes
13055     as list
13056   @type modify_fn: callable
13057   @param modify_fn: Callback for modifying an existing item
13058     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13059     and private data object as added by L{PrepareContainerMods}, returns
13060     changes as list
13061   @type remove_fn: callable
13062   @param remove_fn: Callback on removing item; receives absolute item index,
13063     item and private data object as added by L{PrepareContainerMods}
13064
13065   """
13066   for (op, idx, params, private) in mods:
13067     if idx == -1:
13068       # Append
13069       absidx = len(container) - 1
13070     elif idx < 0:
13071       raise IndexError("Not accepting negative indices other than -1")
13072     elif idx > len(container):
13073       raise IndexError("Got %s index %s, but there are only %s" %
13074                        (kind, idx, len(container)))
13075     else:
13076       absidx = idx
13077
13078     changes = None
13079
13080     if op == constants.DDM_ADD:
13081       # Calculate where item will be added
13082       if idx == -1:
13083         addidx = len(container)
13084       else:
13085         addidx = idx
13086
13087       if create_fn is None:
13088         item = params
13089       else:
13090         (item, changes) = create_fn(addidx, params, private)
13091
13092       if idx == -1:
13093         container.append(item)
13094       else:
13095         assert idx >= 0
13096         assert idx <= len(container)
13097         # list.insert does so before the specified index
13098         container.insert(idx, item)
13099     else:
13100       # Retrieve existing item
13101       try:
13102         item = container[absidx]
13103       except IndexError:
13104         raise IndexError("Invalid %s index %s" % (kind, idx))
13105
13106       if op == constants.DDM_REMOVE:
13107         assert not params
13108
13109         if remove_fn is not None:
13110           remove_fn(absidx, item, private)
13111
13112         changes = [("%s/%s" % (kind, absidx), "remove")]
13113
13114         assert container[absidx] == item
13115         del container[absidx]
13116       elif op == constants.DDM_MODIFY:
13117         if modify_fn is not None:
13118           changes = modify_fn(absidx, item, params, private)
13119       else:
13120         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13121
13122     assert _TApplyContModsCbChanges(changes)
13123
13124     if not (chgdesc is None or changes is None):
13125       chgdesc.extend(changes)
13126
13127
13128 def _UpdateIvNames(base_index, disks):
13129   """Updates the C{iv_name} attribute of disks.
13130
13131   @type disks: list of L{objects.Disk}
13132
13133   """
13134   for (idx, disk) in enumerate(disks):
13135     disk.iv_name = "disk/%s" % (base_index + idx, )
13136
13137
13138 class _InstNicModPrivate:
13139   """Data structure for network interface modifications.
13140
13141   Used by L{LUInstanceSetParams}.
13142
13143   """
13144   def __init__(self):
13145     self.params = None
13146     self.filled = None
13147
13148
13149 class LUInstanceSetParams(LogicalUnit):
13150   """Modifies an instances's parameters.
13151
13152   """
13153   HPATH = "instance-modify"
13154   HTYPE = constants.HTYPE_INSTANCE
13155   REQ_BGL = False
13156
13157   @staticmethod
13158   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13159     assert ht.TList(mods)
13160     assert not mods or len(mods[0]) in (2, 3)
13161
13162     if mods and len(mods[0]) == 2:
13163       result = []
13164
13165       addremove = 0
13166       for op, params in mods:
13167         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13168           result.append((op, -1, params))
13169           addremove += 1
13170
13171           if addremove > 1:
13172             raise errors.OpPrereqError("Only one %s add or remove operation is"
13173                                        " supported at a time" % kind,
13174                                        errors.ECODE_INVAL)
13175         else:
13176           result.append((constants.DDM_MODIFY, op, params))
13177
13178       assert verify_fn(result)
13179     else:
13180       result = mods
13181
13182     return result
13183
13184   @staticmethod
13185   def _CheckMods(kind, mods, key_types, item_fn):
13186     """Ensures requested disk/NIC modifications are valid.
13187
13188     """
13189     for (op, _, params) in mods:
13190       assert ht.TDict(params)
13191
13192       # If 'key_types' is an empty dict, we assume we have an
13193       # 'ext' template and thus do not ForceDictType
13194       if key_types:
13195         utils.ForceDictType(params, key_types)
13196
13197       if op == constants.DDM_REMOVE:
13198         if params:
13199           raise errors.OpPrereqError("No settings should be passed when"
13200                                      " removing a %s" % kind,
13201                                      errors.ECODE_INVAL)
13202       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13203         item_fn(op, params)
13204       else:
13205         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13206
13207   @staticmethod
13208   def _VerifyDiskModification(op, params):
13209     """Verifies a disk modification.
13210
13211     """
13212     if op == constants.DDM_ADD:
13213       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13214       if mode not in constants.DISK_ACCESS_SET:
13215         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13216                                    errors.ECODE_INVAL)
13217
13218       size = params.get(constants.IDISK_SIZE, None)
13219       if size is None:
13220         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13221                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13222
13223       try:
13224         size = int(size)
13225       except (TypeError, ValueError), err:
13226         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13227                                    errors.ECODE_INVAL)
13228
13229       params[constants.IDISK_SIZE] = size
13230
13231     elif op == constants.DDM_MODIFY:
13232       if constants.IDISK_SIZE in params:
13233         raise errors.OpPrereqError("Disk size change not possible, use"
13234                                    " grow-disk", errors.ECODE_INVAL)
13235       if constants.IDISK_MODE not in params:
13236         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13237                                    " modification supported, but missing",
13238                                    errors.ECODE_NOENT)
13239       if len(params) > 1:
13240         raise errors.OpPrereqError("Disk modification doesn't support"
13241                                    " additional arbitrary parameters",
13242                                    errors.ECODE_INVAL)
13243
13244   @staticmethod
13245   def _VerifyNicModification(op, params):
13246     """Verifies a network interface modification.
13247
13248     """
13249     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13250       ip = params.get(constants.INIC_IP, None)
13251       req_net = params.get(constants.INIC_NETWORK, None)
13252       link = params.get(constants.NIC_LINK, None)
13253       mode = params.get(constants.NIC_MODE, None)
13254       if req_net is not None:
13255         if req_net.lower() == constants.VALUE_NONE:
13256           params[constants.INIC_NETWORK] = None
13257           req_net = None
13258         elif link is not None or mode is not None:
13259           raise errors.OpPrereqError("If network is given"
13260                                      " mode or link should not",
13261                                      errors.ECODE_INVAL)
13262
13263       if op == constants.DDM_ADD:
13264         macaddr = params.get(constants.INIC_MAC, None)
13265         if macaddr is None:
13266           params[constants.INIC_MAC] = constants.VALUE_AUTO
13267
13268       if ip is not None:
13269         if ip.lower() == constants.VALUE_NONE:
13270           params[constants.INIC_IP] = None
13271         else:
13272           if ip.lower() == constants.NIC_IP_POOL:
13273             if op == constants.DDM_ADD and req_net is None:
13274               raise errors.OpPrereqError("If ip=pool, parameter network"
13275                                          " cannot be none",
13276                                          errors.ECODE_INVAL)
13277           else:
13278             if not netutils.IPAddress.IsValid(ip):
13279               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13280                                          errors.ECODE_INVAL)
13281
13282       if constants.INIC_MAC in params:
13283         macaddr = params[constants.INIC_MAC]
13284         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13285           macaddr = utils.NormalizeAndValidateMac(macaddr)
13286
13287         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13288           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13289                                      " modifying an existing NIC",
13290                                      errors.ECODE_INVAL)
13291
13292   def CheckArguments(self):
13293     if not (self.op.nics or self.op.disks or self.op.disk_template or
13294             self.op.hvparams or self.op.beparams or self.op.os_name or
13295             self.op.offline is not None or self.op.runtime_mem):
13296       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13297
13298     if self.op.hvparams:
13299       _CheckGlobalHvParams(self.op.hvparams)
13300
13301     self.op.disks = self._UpgradeDiskNicMods(
13302       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13303     self.op.nics = self._UpgradeDiskNicMods(
13304       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13305
13306     if self.op.disks and self.op.disk_template is not None:
13307       raise errors.OpPrereqError("Disk template conversion and other disk"
13308                                  " changes not supported at the same time",
13309                                  errors.ECODE_INVAL)
13310
13311     if (self.op.disk_template and
13312         self.op.disk_template in constants.DTS_INT_MIRROR and
13313         self.op.remote_node is None):
13314       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13315                                  " one requires specifying a secondary node",
13316                                  errors.ECODE_INVAL)
13317
13318     # Check NIC modifications
13319     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13320                     self._VerifyNicModification)
13321
13322   def ExpandNames(self):
13323     self._ExpandAndLockInstance()
13324     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13325     # Can't even acquire node locks in shared mode as upcoming changes in
13326     # Ganeti 2.6 will start to modify the node object on disk conversion
13327     self.needed_locks[locking.LEVEL_NODE] = []
13328     self.needed_locks[locking.LEVEL_NODE_RES] = []
13329     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13330     # Look node group to look up the ipolicy
13331     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13332
13333   def DeclareLocks(self, level):
13334     if level == locking.LEVEL_NODEGROUP:
13335       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13336       # Acquire locks for the instance's nodegroups optimistically. Needs
13337       # to be verified in CheckPrereq
13338       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13339         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13340     elif level == locking.LEVEL_NODE:
13341       self._LockInstancesNodes()
13342       if self.op.disk_template and self.op.remote_node:
13343         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13344         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13345     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13346       # Copy node locks
13347       self.needed_locks[locking.LEVEL_NODE_RES] = \
13348         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13349
13350   def BuildHooksEnv(self):
13351     """Build hooks env.
13352
13353     This runs on the master, primary and secondaries.
13354
13355     """
13356     args = {}
13357     if constants.BE_MINMEM in self.be_new:
13358       args["minmem"] = self.be_new[constants.BE_MINMEM]
13359     if constants.BE_MAXMEM in self.be_new:
13360       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13361     if constants.BE_VCPUS in self.be_new:
13362       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13363     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13364     # information at all.
13365
13366     if self._new_nics is not None:
13367       nics = []
13368
13369       for nic in self._new_nics:
13370         n = copy.deepcopy(nic)
13371         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13372         n.nicparams = nicparams
13373         nics.append(_NICToTuple(self, n))
13374
13375       args["nics"] = nics
13376
13377     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13378     if self.op.disk_template:
13379       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13380     if self.op.runtime_mem:
13381       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13382
13383     return env
13384
13385   def BuildHooksNodes(self):
13386     """Build hooks nodes.
13387
13388     """
13389     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13390     return (nl, nl)
13391
13392   def _PrepareNicModification(self, params, private, old_ip, old_net,
13393                               old_params, cluster, pnode):
13394
13395     update_params_dict = dict([(key, params[key])
13396                                for key in constants.NICS_PARAMETERS
13397                                if key in params])
13398
13399     req_link = update_params_dict.get(constants.NIC_LINK, None)
13400     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13401
13402     new_net = params.get(constants.INIC_NETWORK, old_net)
13403     if new_net is not None:
13404       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13405       if netparams is None:
13406         raise errors.OpPrereqError("No netparams found for the network"
13407                                    " %s, probably not connected" % new_net,
13408                                    errors.ECODE_INVAL)
13409       new_params = dict(netparams)
13410     else:
13411       new_params = _GetUpdatedParams(old_params, update_params_dict)
13412
13413     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13414
13415     new_filled_params = cluster.SimpleFillNIC(new_params)
13416     objects.NIC.CheckParameterSyntax(new_filled_params)
13417
13418     new_mode = new_filled_params[constants.NIC_MODE]
13419     if new_mode == constants.NIC_MODE_BRIDGED:
13420       bridge = new_filled_params[constants.NIC_LINK]
13421       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13422       if msg:
13423         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13424         if self.op.force:
13425           self.warn.append(msg)
13426         else:
13427           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13428
13429     elif new_mode == constants.NIC_MODE_ROUTED:
13430       ip = params.get(constants.INIC_IP, old_ip)
13431       if ip is None:
13432         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13433                                    " on a routed NIC", errors.ECODE_INVAL)
13434
13435     elif new_mode == constants.NIC_MODE_OVS:
13436       # TODO: check OVS link
13437       self.LogInfo("OVS links are currently not checked for correctness")
13438
13439     if constants.INIC_MAC in params:
13440       mac = params[constants.INIC_MAC]
13441       if mac is None:
13442         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13443                                    errors.ECODE_INVAL)
13444       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13445         # otherwise generate the MAC address
13446         params[constants.INIC_MAC] = \
13447           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13448       else:
13449         # or validate/reserve the current one
13450         try:
13451           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13452         except errors.ReservationError:
13453           raise errors.OpPrereqError("MAC address '%s' already in use"
13454                                      " in cluster" % mac,
13455                                      errors.ECODE_NOTUNIQUE)
13456     elif new_net != old_net:
13457
13458       def get_net_prefix(net):
13459         mac_prefix = None
13460         if net:
13461           uuid = self.cfg.LookupNetwork(net)
13462           mac_prefix = self.cfg.GetNetwork(uuid).mac_prefix
13463
13464         return mac_prefix
13465
13466       new_prefix = get_net_prefix(new_net)
13467       old_prefix = get_net_prefix(old_net)
13468       if old_prefix != new_prefix:
13469         params[constants.INIC_MAC] = \
13470           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13471
13472     #if there is a change in nic-network configuration
13473     new_ip = params.get(constants.INIC_IP, old_ip)
13474     if (new_ip, new_net) != (old_ip, old_net):
13475       if new_ip:
13476         if new_net:
13477           if new_ip.lower() == constants.NIC_IP_POOL:
13478             try:
13479               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13480             except errors.ReservationError:
13481               raise errors.OpPrereqError("Unable to get a free IP"
13482                                          " from the address pool",
13483                                          errors.ECODE_STATE)
13484             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13485             params[constants.INIC_IP] = new_ip
13486           elif new_ip != old_ip or new_net != old_net:
13487             try:
13488               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13489               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13490             except errors.ReservationError:
13491               raise errors.OpPrereqError("IP %s not available in network %s" %
13492                                          (new_ip, new_net),
13493                                          errors.ECODE_NOTUNIQUE)
13494         elif new_ip.lower() == constants.NIC_IP_POOL:
13495           raise errors.OpPrereqError("ip=pool, but no network found",
13496                                      errors.ECODE_INVAL)
13497
13498         # new net is None
13499         elif self.op.conflicts_check:
13500           _CheckForConflictingIp(self, new_ip, pnode)
13501
13502       if old_ip:
13503         if old_net:
13504           try:
13505             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13506           except errors.AddressPoolError:
13507             logging.warning("Release IP %s not contained in network %s",
13508                             old_ip, old_net)
13509
13510     # there are no changes in (net, ip) tuple
13511     elif (old_net is not None and
13512           (req_link is not None or req_mode is not None)):
13513       raise errors.OpPrereqError("Not allowed to change link or mode of"
13514                                  " a NIC that is connected to a network",
13515                                  errors.ECODE_INVAL)
13516
13517     private.params = new_params
13518     private.filled = new_filled_params
13519
13520   def _PreCheckDiskTemplate(self, pnode_info):
13521     """CheckPrereq checks related to a new disk template."""
13522     # Arguments are passed to avoid configuration lookups
13523     instance = self.instance
13524     pnode = instance.primary_node
13525     cluster = self.cluster
13526     if instance.disk_template == self.op.disk_template:
13527       raise errors.OpPrereqError("Instance already has disk template %s" %
13528                                  instance.disk_template, errors.ECODE_INVAL)
13529
13530     if (instance.disk_template,
13531         self.op.disk_template) not in self._DISK_CONVERSIONS:
13532       raise errors.OpPrereqError("Unsupported disk template conversion from"
13533                                  " %s to %s" % (instance.disk_template,
13534                                                 self.op.disk_template),
13535                                  errors.ECODE_INVAL)
13536     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13537                         msg="cannot change disk template")
13538     if self.op.disk_template in constants.DTS_INT_MIRROR:
13539       if self.op.remote_node == pnode:
13540         raise errors.OpPrereqError("Given new secondary node %s is the same"
13541                                    " as the primary node of the instance" %
13542                                    self.op.remote_node, errors.ECODE_STATE)
13543       _CheckNodeOnline(self, self.op.remote_node)
13544       _CheckNodeNotDrained(self, self.op.remote_node)
13545       # FIXME: here we assume that the old instance type is DT_PLAIN
13546       assert instance.disk_template == constants.DT_PLAIN
13547       disks = [{constants.IDISK_SIZE: d.size,
13548                 constants.IDISK_VG: d.logical_id[0]}
13549                for d in instance.disks]
13550       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13551       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13552
13553       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13554       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13555       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13556                                                               snode_group)
13557       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13558                               ignore=self.op.ignore_ipolicy)
13559       if pnode_info.group != snode_info.group:
13560         self.LogWarning("The primary and secondary nodes are in two"
13561                         " different node groups; the disk parameters"
13562                         " from the first disk's node group will be"
13563                         " used")
13564
13565     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13566       # Make sure none of the nodes require exclusive storage
13567       nodes = [pnode_info]
13568       if self.op.disk_template in constants.DTS_INT_MIRROR:
13569         assert snode_info
13570         nodes.append(snode_info)
13571       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13572       if compat.any(map(has_es, nodes)):
13573         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13574                   " storage is enabled" % (instance.disk_template,
13575                                            self.op.disk_template))
13576         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13577
13578   def CheckPrereq(self):
13579     """Check prerequisites.
13580
13581     This only checks the instance list against the existing names.
13582
13583     """
13584     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13585     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13586
13587     cluster = self.cluster = self.cfg.GetClusterInfo()
13588     assert self.instance is not None, \
13589       "Cannot retrieve locked instance %s" % self.op.instance_name
13590
13591     pnode = instance.primary_node
13592     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13593     nodelist = list(instance.all_nodes)
13594     pnode_info = self.cfg.GetNodeInfo(pnode)
13595     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13596
13597     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13598     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13599     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13600
13601     # dictionary with instance information after the modification
13602     ispec = {}
13603
13604     # Check disk modifications. This is done here and not in CheckArguments
13605     # (as with NICs), because we need to know the instance's disk template
13606     if instance.disk_template == constants.DT_EXT:
13607       self._CheckMods("disk", self.op.disks, {},
13608                       self._VerifyDiskModification)
13609     else:
13610       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13611                       self._VerifyDiskModification)
13612
13613     # Prepare disk/NIC modifications
13614     self.diskmod = PrepareContainerMods(self.op.disks, None)
13615     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13616
13617     # Check the validity of the `provider' parameter
13618     if instance.disk_template in constants.DT_EXT:
13619       for mod in self.diskmod:
13620         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13621         if mod[0] == constants.DDM_ADD:
13622           if ext_provider is None:
13623             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13624                                        " '%s' missing, during disk add" %
13625                                        (constants.DT_EXT,
13626                                         constants.IDISK_PROVIDER),
13627                                        errors.ECODE_NOENT)
13628         elif mod[0] == constants.DDM_MODIFY:
13629           if ext_provider:
13630             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13631                                        " modification" %
13632                                        constants.IDISK_PROVIDER,
13633                                        errors.ECODE_INVAL)
13634     else:
13635       for mod in self.diskmod:
13636         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13637         if ext_provider is not None:
13638           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13639                                      " instances of type '%s'" %
13640                                      (constants.IDISK_PROVIDER,
13641                                       constants.DT_EXT),
13642                                      errors.ECODE_INVAL)
13643
13644     # OS change
13645     if self.op.os_name and not self.op.force:
13646       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13647                       self.op.force_variant)
13648       instance_os = self.op.os_name
13649     else:
13650       instance_os = instance.os
13651
13652     assert not (self.op.disk_template and self.op.disks), \
13653       "Can't modify disk template and apply disk changes at the same time"
13654
13655     if self.op.disk_template:
13656       self._PreCheckDiskTemplate(pnode_info)
13657
13658     # hvparams processing
13659     if self.op.hvparams:
13660       hv_type = instance.hypervisor
13661       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13662       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13663       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13664
13665       # local check
13666       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13667       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13668       self.hv_proposed = self.hv_new = hv_new # the new actual values
13669       self.hv_inst = i_hvdict # the new dict (without defaults)
13670     else:
13671       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13672                                               instance.hvparams)
13673       self.hv_new = self.hv_inst = {}
13674
13675     # beparams processing
13676     if self.op.beparams:
13677       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13678                                    use_none=True)
13679       objects.UpgradeBeParams(i_bedict)
13680       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13681       be_new = cluster.SimpleFillBE(i_bedict)
13682       self.be_proposed = self.be_new = be_new # the new actual values
13683       self.be_inst = i_bedict # the new dict (without defaults)
13684     else:
13685       self.be_new = self.be_inst = {}
13686       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13687     be_old = cluster.FillBE(instance)
13688
13689     # CPU param validation -- checking every time a parameter is
13690     # changed to cover all cases where either CPU mask or vcpus have
13691     # changed
13692     if (constants.BE_VCPUS in self.be_proposed and
13693         constants.HV_CPU_MASK in self.hv_proposed):
13694       cpu_list = \
13695         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13696       # Verify mask is consistent with number of vCPUs. Can skip this
13697       # test if only 1 entry in the CPU mask, which means same mask
13698       # is applied to all vCPUs.
13699       if (len(cpu_list) > 1 and
13700           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13701         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13702                                    " CPU mask [%s]" %
13703                                    (self.be_proposed[constants.BE_VCPUS],
13704                                     self.hv_proposed[constants.HV_CPU_MASK]),
13705                                    errors.ECODE_INVAL)
13706
13707       # Only perform this test if a new CPU mask is given
13708       if constants.HV_CPU_MASK in self.hv_new:
13709         # Calculate the largest CPU number requested
13710         max_requested_cpu = max(map(max, cpu_list))
13711         # Check that all of the instance's nodes have enough physical CPUs to
13712         # satisfy the requested CPU mask
13713         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13714                                 max_requested_cpu + 1, instance.hypervisor)
13715
13716     # osparams processing
13717     if self.op.osparams:
13718       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13719       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13720       self.os_inst = i_osdict # the new dict (without defaults)
13721     else:
13722       self.os_inst = {}
13723
13724     self.warn = []
13725
13726     #TODO(dynmem): do the appropriate check involving MINMEM
13727     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13728         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13729       mem_check_list = [pnode]
13730       if be_new[constants.BE_AUTO_BALANCE]:
13731         # either we changed auto_balance to yes or it was from before
13732         mem_check_list.extend(instance.secondary_nodes)
13733       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13734                                                   instance.hypervisor)
13735       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13736                                          [instance.hypervisor], False)
13737       pninfo = nodeinfo[pnode]
13738       msg = pninfo.fail_msg
13739       if msg:
13740         # Assume the primary node is unreachable and go ahead
13741         self.warn.append("Can't get info from primary node %s: %s" %
13742                          (pnode, msg))
13743       else:
13744         (_, _, (pnhvinfo, )) = pninfo.payload
13745         if not isinstance(pnhvinfo.get("memory_free", None), int):
13746           self.warn.append("Node data from primary node %s doesn't contain"
13747                            " free memory information" % pnode)
13748         elif instance_info.fail_msg:
13749           self.warn.append("Can't get instance runtime information: %s" %
13750                            instance_info.fail_msg)
13751         else:
13752           if instance_info.payload:
13753             current_mem = int(instance_info.payload["memory"])
13754           else:
13755             # Assume instance not running
13756             # (there is a slight race condition here, but it's not very
13757             # probable, and we have no other way to check)
13758             # TODO: Describe race condition
13759             current_mem = 0
13760           #TODO(dynmem): do the appropriate check involving MINMEM
13761           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13762                       pnhvinfo["memory_free"])
13763           if miss_mem > 0:
13764             raise errors.OpPrereqError("This change will prevent the instance"
13765                                        " from starting, due to %d MB of memory"
13766                                        " missing on its primary node" %
13767                                        miss_mem, errors.ECODE_NORES)
13768
13769       if be_new[constants.BE_AUTO_BALANCE]:
13770         for node, nres in nodeinfo.items():
13771           if node not in instance.secondary_nodes:
13772             continue
13773           nres.Raise("Can't get info from secondary node %s" % node,
13774                      prereq=True, ecode=errors.ECODE_STATE)
13775           (_, _, (nhvinfo, )) = nres.payload
13776           if not isinstance(nhvinfo.get("memory_free", None), int):
13777             raise errors.OpPrereqError("Secondary node %s didn't return free"
13778                                        " memory information" % node,
13779                                        errors.ECODE_STATE)
13780           #TODO(dynmem): do the appropriate check involving MINMEM
13781           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13782             raise errors.OpPrereqError("This change will prevent the instance"
13783                                        " from failover to its secondary node"
13784                                        " %s, due to not enough memory" % node,
13785                                        errors.ECODE_STATE)
13786
13787     if self.op.runtime_mem:
13788       remote_info = self.rpc.call_instance_info(instance.primary_node,
13789                                                 instance.name,
13790                                                 instance.hypervisor)
13791       remote_info.Raise("Error checking node %s" % instance.primary_node)
13792       if not remote_info.payload: # not running already
13793         raise errors.OpPrereqError("Instance %s is not running" %
13794                                    instance.name, errors.ECODE_STATE)
13795
13796       current_memory = remote_info.payload["memory"]
13797       if (not self.op.force and
13798            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13799             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13800         raise errors.OpPrereqError("Instance %s must have memory between %d"
13801                                    " and %d MB of memory unless --force is"
13802                                    " given" %
13803                                    (instance.name,
13804                                     self.be_proposed[constants.BE_MINMEM],
13805                                     self.be_proposed[constants.BE_MAXMEM]),
13806                                    errors.ECODE_INVAL)
13807
13808       delta = self.op.runtime_mem - current_memory
13809       if delta > 0:
13810         _CheckNodeFreeMemory(self, instance.primary_node,
13811                              "ballooning memory for instance %s" %
13812                              instance.name, delta, instance.hypervisor)
13813
13814     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13815       raise errors.OpPrereqError("Disk operations not supported for"
13816                                  " diskless instances", errors.ECODE_INVAL)
13817
13818     def _PrepareNicCreate(_, params, private):
13819       self._PrepareNicModification(params, private, None, None,
13820                                    {}, cluster, pnode)
13821       return (None, None)
13822
13823     def _PrepareNicMod(_, nic, params, private):
13824       self._PrepareNicModification(params, private, nic.ip, nic.network,
13825                                    nic.nicparams, cluster, pnode)
13826       return None
13827
13828     def _PrepareNicRemove(_, params, __):
13829       ip = params.ip
13830       net = params.network
13831       if net is not None and ip is not None:
13832         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13833
13834     # Verify NIC changes (operating on copy)
13835     nics = instance.nics[:]
13836     ApplyContainerMods("NIC", nics, None, self.nicmod,
13837                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13838     if len(nics) > constants.MAX_NICS:
13839       raise errors.OpPrereqError("Instance has too many network interfaces"
13840                                  " (%d), cannot add more" % constants.MAX_NICS,
13841                                  errors.ECODE_STATE)
13842
13843     # Verify disk changes (operating on a copy)
13844     disks = instance.disks[:]
13845     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13846     if len(disks) > constants.MAX_DISKS:
13847       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13848                                  " more" % constants.MAX_DISKS,
13849                                  errors.ECODE_STATE)
13850     disk_sizes = [disk.size for disk in instance.disks]
13851     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13852                       self.diskmod if op == constants.DDM_ADD)
13853     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13854     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13855
13856     if self.op.offline is not None and self.op.offline:
13857       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13858                           msg="can't change to offline")
13859
13860     # Pre-compute NIC changes (necessary to use result in hooks)
13861     self._nic_chgdesc = []
13862     if self.nicmod:
13863       # Operate on copies as this is still in prereq
13864       nics = [nic.Copy() for nic in instance.nics]
13865       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13866                          self._CreateNewNic, self._ApplyNicMods, None)
13867       self._new_nics = nics
13868       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13869     else:
13870       self._new_nics = None
13871       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13872
13873     if not self.op.ignore_ipolicy:
13874       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13875                                                               group_info)
13876
13877       # Fill ispec with backend parameters
13878       ispec[constants.ISPEC_SPINDLE_USE] = \
13879         self.be_new.get(constants.BE_SPINDLE_USE, None)
13880       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13881                                                          None)
13882
13883       # Copy ispec to verify parameters with min/max values separately
13884       ispec_max = ispec.copy()
13885       ispec_max[constants.ISPEC_MEM_SIZE] = \
13886         self.be_new.get(constants.BE_MAXMEM, None)
13887       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13888       ispec_min = ispec.copy()
13889       ispec_min[constants.ISPEC_MEM_SIZE] = \
13890         self.be_new.get(constants.BE_MINMEM, None)
13891       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13892
13893       if (res_max or res_min):
13894         # FIXME: Improve error message by including information about whether
13895         # the upper or lower limit of the parameter fails the ipolicy.
13896         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13897                (group_info, group_info.name,
13898                 utils.CommaJoin(set(res_max + res_min))))
13899         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13900
13901   def _ConvertPlainToDrbd(self, feedback_fn):
13902     """Converts an instance from plain to drbd.
13903
13904     """
13905     feedback_fn("Converting template to drbd")
13906     instance = self.instance
13907     pnode = instance.primary_node
13908     snode = self.op.remote_node
13909
13910     assert instance.disk_template == constants.DT_PLAIN
13911
13912     # create a fake disk info for _GenerateDiskTemplate
13913     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13914                   constants.IDISK_VG: d.logical_id[0]}
13915                  for d in instance.disks]
13916     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13917                                       instance.name, pnode, [snode],
13918                                       disk_info, None, None, 0, feedback_fn,
13919                                       self.diskparams)
13920     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13921                                         self.diskparams)
13922     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13923     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13924     info = _GetInstanceInfoText(instance)
13925     feedback_fn("Creating additional volumes...")
13926     # first, create the missing data and meta devices
13927     for disk in anno_disks:
13928       # unfortunately this is... not too nice
13929       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13930                             info, True, p_excl_stor)
13931       for child in disk.children:
13932         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13933                               s_excl_stor)
13934     # at this stage, all new LVs have been created, we can rename the
13935     # old ones
13936     feedback_fn("Renaming original volumes...")
13937     rename_list = [(o, n.children[0].logical_id)
13938                    for (o, n) in zip(instance.disks, new_disks)]
13939     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13940     result.Raise("Failed to rename original LVs")
13941
13942     feedback_fn("Initializing DRBD devices...")
13943     # all child devices are in place, we can now create the DRBD devices
13944     for disk in anno_disks:
13945       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13946         f_create = node == pnode
13947         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13948                               excl_stor)
13949
13950     # at this point, the instance has been modified
13951     instance.disk_template = constants.DT_DRBD8
13952     instance.disks = new_disks
13953     self.cfg.Update(instance, feedback_fn)
13954
13955     # Release node locks while waiting for sync
13956     _ReleaseLocks(self, locking.LEVEL_NODE)
13957
13958     # disks are created, waiting for sync
13959     disk_abort = not _WaitForSync(self, instance,
13960                                   oneshot=not self.op.wait_for_sync)
13961     if disk_abort:
13962       raise errors.OpExecError("There are some degraded disks for"
13963                                " this instance, please cleanup manually")
13964
13965     # Node resource locks will be released by caller
13966
13967   def _ConvertDrbdToPlain(self, feedback_fn):
13968     """Converts an instance from drbd to plain.
13969
13970     """
13971     instance = self.instance
13972
13973     assert len(instance.secondary_nodes) == 1
13974     assert instance.disk_template == constants.DT_DRBD8
13975
13976     pnode = instance.primary_node
13977     snode = instance.secondary_nodes[0]
13978     feedback_fn("Converting template to plain")
13979
13980     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13981     new_disks = [d.children[0] for d in instance.disks]
13982
13983     # copy over size and mode
13984     for parent, child in zip(old_disks, new_disks):
13985       child.size = parent.size
13986       child.mode = parent.mode
13987
13988     # this is a DRBD disk, return its port to the pool
13989     # NOTE: this must be done right before the call to cfg.Update!
13990     for disk in old_disks:
13991       tcp_port = disk.logical_id[2]
13992       self.cfg.AddTcpUdpPort(tcp_port)
13993
13994     # update instance structure
13995     instance.disks = new_disks
13996     instance.disk_template = constants.DT_PLAIN
13997     self.cfg.Update(instance, feedback_fn)
13998
13999     # Release locks in case removing disks takes a while
14000     _ReleaseLocks(self, locking.LEVEL_NODE)
14001
14002     feedback_fn("Removing volumes on the secondary node...")
14003     for disk in old_disks:
14004       self.cfg.SetDiskID(disk, snode)
14005       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14006       if msg:
14007         self.LogWarning("Could not remove block device %s on node %s,"
14008                         " continuing anyway: %s", disk.iv_name, snode, msg)
14009
14010     feedback_fn("Removing unneeded volumes on the primary node...")
14011     for idx, disk in enumerate(old_disks):
14012       meta = disk.children[1]
14013       self.cfg.SetDiskID(meta, pnode)
14014       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14015       if msg:
14016         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14017                         " continuing anyway: %s", idx, pnode, msg)
14018
14019   def _CreateNewDisk(self, idx, params, _):
14020     """Creates a new disk.
14021
14022     """
14023     instance = self.instance
14024
14025     # add a new disk
14026     if instance.disk_template in constants.DTS_FILEBASED:
14027       (file_driver, file_path) = instance.disks[0].logical_id
14028       file_path = os.path.dirname(file_path)
14029     else:
14030       file_driver = file_path = None
14031
14032     disk = \
14033       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14034                             instance.primary_node, instance.secondary_nodes,
14035                             [params], file_path, file_driver, idx,
14036                             self.Log, self.diskparams)[0]
14037
14038     info = _GetInstanceInfoText(instance)
14039
14040     logging.info("Creating volume %s for instance %s",
14041                  disk.iv_name, instance.name)
14042     # Note: this needs to be kept in sync with _CreateDisks
14043     #HARDCODE
14044     for node in instance.all_nodes:
14045       f_create = (node == instance.primary_node)
14046       try:
14047         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14048       except errors.OpExecError, err:
14049         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14050                         disk.iv_name, disk, node, err)
14051
14052     return (disk, [
14053       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14054       ])
14055
14056   @staticmethod
14057   def _ModifyDisk(idx, disk, params, _):
14058     """Modifies a disk.
14059
14060     """
14061     disk.mode = params[constants.IDISK_MODE]
14062
14063     return [
14064       ("disk.mode/%d" % idx, disk.mode),
14065       ]
14066
14067   def _RemoveDisk(self, idx, root, _):
14068     """Removes a disk.
14069
14070     """
14071     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14072     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14073       self.cfg.SetDiskID(disk, node)
14074       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14075       if msg:
14076         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14077                         " continuing anyway", idx, node, msg)
14078
14079     # if this is a DRBD disk, return its port to the pool
14080     if root.dev_type in constants.LDS_DRBD:
14081       self.cfg.AddTcpUdpPort(root.logical_id[2])
14082
14083   @staticmethod
14084   def _CreateNewNic(idx, params, private):
14085     """Creates data structure for a new network interface.
14086
14087     """
14088     mac = params[constants.INIC_MAC]
14089     ip = params.get(constants.INIC_IP, None)
14090     net = params.get(constants.INIC_NETWORK, None)
14091     #TODO: not private.filled?? can a nic have no nicparams??
14092     nicparams = private.filled
14093
14094     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14095       ("nic.%d" % idx,
14096        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14097        (mac, ip, private.filled[constants.NIC_MODE],
14098        private.filled[constants.NIC_LINK],
14099        net)),
14100       ])
14101
14102   @staticmethod
14103   def _ApplyNicMods(idx, nic, params, private):
14104     """Modifies a network interface.
14105
14106     """
14107     changes = []
14108
14109     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14110       if key in params:
14111         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14112         setattr(nic, key, params[key])
14113
14114     if private.filled:
14115       nic.nicparams = private.filled
14116
14117       for (key, val) in nic.nicparams.items():
14118         changes.append(("nic.%s/%d" % (key, idx), val))
14119
14120     return changes
14121
14122   def Exec(self, feedback_fn):
14123     """Modifies an instance.
14124
14125     All parameters take effect only at the next restart of the instance.
14126
14127     """
14128     # Process here the warnings from CheckPrereq, as we don't have a
14129     # feedback_fn there.
14130     # TODO: Replace with self.LogWarning
14131     for warn in self.warn:
14132       feedback_fn("WARNING: %s" % warn)
14133
14134     assert ((self.op.disk_template is None) ^
14135             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14136       "Not owning any node resource locks"
14137
14138     result = []
14139     instance = self.instance
14140
14141     # runtime memory
14142     if self.op.runtime_mem:
14143       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14144                                                      instance,
14145                                                      self.op.runtime_mem)
14146       rpcres.Raise("Cannot modify instance runtime memory")
14147       result.append(("runtime_memory", self.op.runtime_mem))
14148
14149     # Apply disk changes
14150     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14151                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14152     _UpdateIvNames(0, instance.disks)
14153
14154     if self.op.disk_template:
14155       if __debug__:
14156         check_nodes = set(instance.all_nodes)
14157         if self.op.remote_node:
14158           check_nodes.add(self.op.remote_node)
14159         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14160           owned = self.owned_locks(level)
14161           assert not (check_nodes - owned), \
14162             ("Not owning the correct locks, owning %r, expected at least %r" %
14163              (owned, check_nodes))
14164
14165       r_shut = _ShutdownInstanceDisks(self, instance)
14166       if not r_shut:
14167         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14168                                  " proceed with disk template conversion")
14169       mode = (instance.disk_template, self.op.disk_template)
14170       try:
14171         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14172       except:
14173         self.cfg.ReleaseDRBDMinors(instance.name)
14174         raise
14175       result.append(("disk_template", self.op.disk_template))
14176
14177       assert instance.disk_template == self.op.disk_template, \
14178         ("Expected disk template '%s', found '%s'" %
14179          (self.op.disk_template, instance.disk_template))
14180
14181     # Release node and resource locks if there are any (they might already have
14182     # been released during disk conversion)
14183     _ReleaseLocks(self, locking.LEVEL_NODE)
14184     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14185
14186     # Apply NIC changes
14187     if self._new_nics is not None:
14188       instance.nics = self._new_nics
14189       result.extend(self._nic_chgdesc)
14190
14191     # hvparams changes
14192     if self.op.hvparams:
14193       instance.hvparams = self.hv_inst
14194       for key, val in self.op.hvparams.iteritems():
14195         result.append(("hv/%s" % key, val))
14196
14197     # beparams changes
14198     if self.op.beparams:
14199       instance.beparams = self.be_inst
14200       for key, val in self.op.beparams.iteritems():
14201         result.append(("be/%s" % key, val))
14202
14203     # OS change
14204     if self.op.os_name:
14205       instance.os = self.op.os_name
14206
14207     # osparams changes
14208     if self.op.osparams:
14209       instance.osparams = self.os_inst
14210       for key, val in self.op.osparams.iteritems():
14211         result.append(("os/%s" % key, val))
14212
14213     if self.op.offline is None:
14214       # Ignore
14215       pass
14216     elif self.op.offline:
14217       # Mark instance as offline
14218       self.cfg.MarkInstanceOffline(instance.name)
14219       result.append(("admin_state", constants.ADMINST_OFFLINE))
14220     else:
14221       # Mark instance as online, but stopped
14222       self.cfg.MarkInstanceDown(instance.name)
14223       result.append(("admin_state", constants.ADMINST_DOWN))
14224
14225     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14226
14227     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14228                 self.owned_locks(locking.LEVEL_NODE)), \
14229       "All node locks should have been released by now"
14230
14231     return result
14232
14233   _DISK_CONVERSIONS = {
14234     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14235     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14236     }
14237
14238
14239 class LUInstanceChangeGroup(LogicalUnit):
14240   HPATH = "instance-change-group"
14241   HTYPE = constants.HTYPE_INSTANCE
14242   REQ_BGL = False
14243
14244   def ExpandNames(self):
14245     self.share_locks = _ShareAll()
14246
14247     self.needed_locks = {
14248       locking.LEVEL_NODEGROUP: [],
14249       locking.LEVEL_NODE: [],
14250       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14251       }
14252
14253     self._ExpandAndLockInstance()
14254
14255     if self.op.target_groups:
14256       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14257                                   self.op.target_groups)
14258     else:
14259       self.req_target_uuids = None
14260
14261     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14262
14263   def DeclareLocks(self, level):
14264     if level == locking.LEVEL_NODEGROUP:
14265       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14266
14267       if self.req_target_uuids:
14268         lock_groups = set(self.req_target_uuids)
14269
14270         # Lock all groups used by instance optimistically; this requires going
14271         # via the node before it's locked, requiring verification later on
14272         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14273         lock_groups.update(instance_groups)
14274       else:
14275         # No target groups, need to lock all of them
14276         lock_groups = locking.ALL_SET
14277
14278       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14279
14280     elif level == locking.LEVEL_NODE:
14281       if self.req_target_uuids:
14282         # Lock all nodes used by instances
14283         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14284         self._LockInstancesNodes()
14285
14286         # Lock all nodes in all potential target groups
14287         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14288                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14289         member_nodes = [node_name
14290                         for group in lock_groups
14291                         for node_name in self.cfg.GetNodeGroup(group).members]
14292         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14293       else:
14294         # Lock all nodes as all groups are potential targets
14295         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14296
14297   def CheckPrereq(self):
14298     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14299     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14300     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14301
14302     assert (self.req_target_uuids is None or
14303             owned_groups.issuperset(self.req_target_uuids))
14304     assert owned_instances == set([self.op.instance_name])
14305
14306     # Get instance information
14307     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14308
14309     # Check if node groups for locked instance are still correct
14310     assert owned_nodes.issuperset(self.instance.all_nodes), \
14311       ("Instance %s's nodes changed while we kept the lock" %
14312        self.op.instance_name)
14313
14314     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14315                                            owned_groups)
14316
14317     if self.req_target_uuids:
14318       # User requested specific target groups
14319       self.target_uuids = frozenset(self.req_target_uuids)
14320     else:
14321       # All groups except those used by the instance are potential targets
14322       self.target_uuids = owned_groups - inst_groups
14323
14324     conflicting_groups = self.target_uuids & inst_groups
14325     if conflicting_groups:
14326       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14327                                  " used by the instance '%s'" %
14328                                  (utils.CommaJoin(conflicting_groups),
14329                                   self.op.instance_name),
14330                                  errors.ECODE_INVAL)
14331
14332     if not self.target_uuids:
14333       raise errors.OpPrereqError("There are no possible target groups",
14334                                  errors.ECODE_INVAL)
14335
14336   def BuildHooksEnv(self):
14337     """Build hooks env.
14338
14339     """
14340     assert self.target_uuids
14341
14342     env = {
14343       "TARGET_GROUPS": " ".join(self.target_uuids),
14344       }
14345
14346     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14347
14348     return env
14349
14350   def BuildHooksNodes(self):
14351     """Build hooks nodes.
14352
14353     """
14354     mn = self.cfg.GetMasterNode()
14355     return ([mn], [mn])
14356
14357   def Exec(self, feedback_fn):
14358     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14359
14360     assert instances == [self.op.instance_name], "Instance not locked"
14361
14362     req = iallocator.IAReqGroupChange(instances=instances,
14363                                       target_groups=list(self.target_uuids))
14364     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14365
14366     ial.Run(self.op.iallocator)
14367
14368     if not ial.success:
14369       raise errors.OpPrereqError("Can't compute solution for changing group of"
14370                                  " instance '%s' using iallocator '%s': %s" %
14371                                  (self.op.instance_name, self.op.iallocator,
14372                                   ial.info), errors.ECODE_NORES)
14373
14374     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14375
14376     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14377                  " instance '%s'", len(jobs), self.op.instance_name)
14378
14379     return ResultWithJobs(jobs)
14380
14381
14382 class LUBackupQuery(NoHooksLU):
14383   """Query the exports list
14384
14385   """
14386   REQ_BGL = False
14387
14388   def CheckArguments(self):
14389     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14390                              ["node", "export"], self.op.use_locking)
14391
14392   def ExpandNames(self):
14393     self.expq.ExpandNames(self)
14394
14395   def DeclareLocks(self, level):
14396     self.expq.DeclareLocks(self, level)
14397
14398   def Exec(self, feedback_fn):
14399     result = {}
14400
14401     for (node, expname) in self.expq.OldStyleQuery(self):
14402       if expname is None:
14403         result[node] = False
14404       else:
14405         result.setdefault(node, []).append(expname)
14406
14407     return result
14408
14409
14410 class _ExportQuery(_QueryBase):
14411   FIELDS = query.EXPORT_FIELDS
14412
14413   #: The node name is not a unique key for this query
14414   SORT_FIELD = "node"
14415
14416   def ExpandNames(self, lu):
14417     lu.needed_locks = {}
14418
14419     # The following variables interact with _QueryBase._GetNames
14420     if self.names:
14421       self.wanted = _GetWantedNodes(lu, self.names)
14422     else:
14423       self.wanted = locking.ALL_SET
14424
14425     self.do_locking = self.use_locking
14426
14427     if self.do_locking:
14428       lu.share_locks = _ShareAll()
14429       lu.needed_locks = {
14430         locking.LEVEL_NODE: self.wanted,
14431         }
14432
14433       if not self.names:
14434         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14435
14436   def DeclareLocks(self, lu, level):
14437     pass
14438
14439   def _GetQueryData(self, lu):
14440     """Computes the list of nodes and their attributes.
14441
14442     """
14443     # Locking is not used
14444     # TODO
14445     assert not (compat.any(lu.glm.is_owned(level)
14446                            for level in locking.LEVELS
14447                            if level != locking.LEVEL_CLUSTER) or
14448                 self.do_locking or self.use_locking)
14449
14450     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14451
14452     result = []
14453
14454     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14455       if nres.fail_msg:
14456         result.append((node, None))
14457       else:
14458         result.extend((node, expname) for expname in nres.payload)
14459
14460     return result
14461
14462
14463 class LUBackupPrepare(NoHooksLU):
14464   """Prepares an instance for an export and returns useful information.
14465
14466   """
14467   REQ_BGL = False
14468
14469   def ExpandNames(self):
14470     self._ExpandAndLockInstance()
14471
14472   def CheckPrereq(self):
14473     """Check prerequisites.
14474
14475     """
14476     instance_name = self.op.instance_name
14477
14478     self.instance = self.cfg.GetInstanceInfo(instance_name)
14479     assert self.instance is not None, \
14480           "Cannot retrieve locked instance %s" % self.op.instance_name
14481     _CheckNodeOnline(self, self.instance.primary_node)
14482
14483     self._cds = _GetClusterDomainSecret()
14484
14485   def Exec(self, feedback_fn):
14486     """Prepares an instance for an export.
14487
14488     """
14489     instance = self.instance
14490
14491     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14492       salt = utils.GenerateSecret(8)
14493
14494       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14495       result = self.rpc.call_x509_cert_create(instance.primary_node,
14496                                               constants.RIE_CERT_VALIDITY)
14497       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14498
14499       (name, cert_pem) = result.payload
14500
14501       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14502                                              cert_pem)
14503
14504       return {
14505         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14506         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14507                           salt),
14508         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14509         }
14510
14511     return None
14512
14513
14514 class LUBackupExport(LogicalUnit):
14515   """Export an instance to an image in the cluster.
14516
14517   """
14518   HPATH = "instance-export"
14519   HTYPE = constants.HTYPE_INSTANCE
14520   REQ_BGL = False
14521
14522   def CheckArguments(self):
14523     """Check the arguments.
14524
14525     """
14526     self.x509_key_name = self.op.x509_key_name
14527     self.dest_x509_ca_pem = self.op.destination_x509_ca
14528
14529     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14530       if not self.x509_key_name:
14531         raise errors.OpPrereqError("Missing X509 key name for encryption",
14532                                    errors.ECODE_INVAL)
14533
14534       if not self.dest_x509_ca_pem:
14535         raise errors.OpPrereqError("Missing destination X509 CA",
14536                                    errors.ECODE_INVAL)
14537
14538   def ExpandNames(self):
14539     self._ExpandAndLockInstance()
14540
14541     # Lock all nodes for local exports
14542     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14543       # FIXME: lock only instance primary and destination node
14544       #
14545       # Sad but true, for now we have do lock all nodes, as we don't know where
14546       # the previous export might be, and in this LU we search for it and
14547       # remove it from its current node. In the future we could fix this by:
14548       #  - making a tasklet to search (share-lock all), then create the
14549       #    new one, then one to remove, after
14550       #  - removing the removal operation altogether
14551       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14552
14553       # Allocations should be stopped while this LU runs with node locks, but
14554       # it doesn't have to be exclusive
14555       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14556       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14557
14558   def DeclareLocks(self, level):
14559     """Last minute lock declaration."""
14560     # All nodes are locked anyway, so nothing to do here.
14561
14562   def BuildHooksEnv(self):
14563     """Build hooks env.
14564
14565     This will run on the master, primary node and target node.
14566
14567     """
14568     env = {
14569       "EXPORT_MODE": self.op.mode,
14570       "EXPORT_NODE": self.op.target_node,
14571       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14572       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14573       # TODO: Generic function for boolean env variables
14574       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14575       }
14576
14577     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14578
14579     return env
14580
14581   def BuildHooksNodes(self):
14582     """Build hooks nodes.
14583
14584     """
14585     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14586
14587     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14588       nl.append(self.op.target_node)
14589
14590     return (nl, nl)
14591
14592   def CheckPrereq(self):
14593     """Check prerequisites.
14594
14595     This checks that the instance and node names are valid.
14596
14597     """
14598     instance_name = self.op.instance_name
14599
14600     self.instance = self.cfg.GetInstanceInfo(instance_name)
14601     assert self.instance is not None, \
14602           "Cannot retrieve locked instance %s" % self.op.instance_name
14603     _CheckNodeOnline(self, self.instance.primary_node)
14604
14605     if (self.op.remove_instance and
14606         self.instance.admin_state == constants.ADMINST_UP and
14607         not self.op.shutdown):
14608       raise errors.OpPrereqError("Can not remove instance without shutting it"
14609                                  " down before", errors.ECODE_STATE)
14610
14611     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14612       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14613       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14614       assert self.dst_node is not None
14615
14616       _CheckNodeOnline(self, self.dst_node.name)
14617       _CheckNodeNotDrained(self, self.dst_node.name)
14618
14619       self._cds = None
14620       self.dest_disk_info = None
14621       self.dest_x509_ca = None
14622
14623     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14624       self.dst_node = None
14625
14626       if len(self.op.target_node) != len(self.instance.disks):
14627         raise errors.OpPrereqError(("Received destination information for %s"
14628                                     " disks, but instance %s has %s disks") %
14629                                    (len(self.op.target_node), instance_name,
14630                                     len(self.instance.disks)),
14631                                    errors.ECODE_INVAL)
14632
14633       cds = _GetClusterDomainSecret()
14634
14635       # Check X509 key name
14636       try:
14637         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14638       except (TypeError, ValueError), err:
14639         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14640                                    errors.ECODE_INVAL)
14641
14642       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14643         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14644                                    errors.ECODE_INVAL)
14645
14646       # Load and verify CA
14647       try:
14648         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14649       except OpenSSL.crypto.Error, err:
14650         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14651                                    (err, ), errors.ECODE_INVAL)
14652
14653       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14654       if errcode is not None:
14655         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14656                                    (msg, ), errors.ECODE_INVAL)
14657
14658       self.dest_x509_ca = cert
14659
14660       # Verify target information
14661       disk_info = []
14662       for idx, disk_data in enumerate(self.op.target_node):
14663         try:
14664           (host, port, magic) = \
14665             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14666         except errors.GenericError, err:
14667           raise errors.OpPrereqError("Target info for disk %s: %s" %
14668                                      (idx, err), errors.ECODE_INVAL)
14669
14670         disk_info.append((host, port, magic))
14671
14672       assert len(disk_info) == len(self.op.target_node)
14673       self.dest_disk_info = disk_info
14674
14675     else:
14676       raise errors.ProgrammerError("Unhandled export mode %r" %
14677                                    self.op.mode)
14678
14679     # instance disk type verification
14680     # TODO: Implement export support for file-based disks
14681     for disk in self.instance.disks:
14682       if disk.dev_type == constants.LD_FILE:
14683         raise errors.OpPrereqError("Export not supported for instances with"
14684                                    " file-based disks", errors.ECODE_INVAL)
14685
14686   def _CleanupExports(self, feedback_fn):
14687     """Removes exports of current instance from all other nodes.
14688
14689     If an instance in a cluster with nodes A..D was exported to node C, its
14690     exports will be removed from the nodes A, B and D.
14691
14692     """
14693     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14694
14695     nodelist = self.cfg.GetNodeList()
14696     nodelist.remove(self.dst_node.name)
14697
14698     # on one-node clusters nodelist will be empty after the removal
14699     # if we proceed the backup would be removed because OpBackupQuery
14700     # substitutes an empty list with the full cluster node list.
14701     iname = self.instance.name
14702     if nodelist:
14703       feedback_fn("Removing old exports for instance %s" % iname)
14704       exportlist = self.rpc.call_export_list(nodelist)
14705       for node in exportlist:
14706         if exportlist[node].fail_msg:
14707           continue
14708         if iname in exportlist[node].payload:
14709           msg = self.rpc.call_export_remove(node, iname).fail_msg
14710           if msg:
14711             self.LogWarning("Could not remove older export for instance %s"
14712                             " on node %s: %s", iname, node, msg)
14713
14714   def Exec(self, feedback_fn):
14715     """Export an instance to an image in the cluster.
14716
14717     """
14718     assert self.op.mode in constants.EXPORT_MODES
14719
14720     instance = self.instance
14721     src_node = instance.primary_node
14722
14723     if self.op.shutdown:
14724       # shutdown the instance, but not the disks
14725       feedback_fn("Shutting down instance %s" % instance.name)
14726       result = self.rpc.call_instance_shutdown(src_node, instance,
14727                                                self.op.shutdown_timeout)
14728       # TODO: Maybe ignore failures if ignore_remove_failures is set
14729       result.Raise("Could not shutdown instance %s on"
14730                    " node %s" % (instance.name, src_node))
14731
14732     # set the disks ID correctly since call_instance_start needs the
14733     # correct drbd minor to create the symlinks
14734     for disk in instance.disks:
14735       self.cfg.SetDiskID(disk, src_node)
14736
14737     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14738
14739     if activate_disks:
14740       # Activate the instance disks if we'exporting a stopped instance
14741       feedback_fn("Activating disks for %s" % instance.name)
14742       _StartInstanceDisks(self, instance, None)
14743
14744     try:
14745       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14746                                                      instance)
14747
14748       helper.CreateSnapshots()
14749       try:
14750         if (self.op.shutdown and
14751             instance.admin_state == constants.ADMINST_UP and
14752             not self.op.remove_instance):
14753           assert not activate_disks
14754           feedback_fn("Starting instance %s" % instance.name)
14755           result = self.rpc.call_instance_start(src_node,
14756                                                 (instance, None, None), False)
14757           msg = result.fail_msg
14758           if msg:
14759             feedback_fn("Failed to start instance: %s" % msg)
14760             _ShutdownInstanceDisks(self, instance)
14761             raise errors.OpExecError("Could not start instance: %s" % msg)
14762
14763         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14764           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14765         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14766           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14767           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14768
14769           (key_name, _, _) = self.x509_key_name
14770
14771           dest_ca_pem = \
14772             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14773                                             self.dest_x509_ca)
14774
14775           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14776                                                      key_name, dest_ca_pem,
14777                                                      timeouts)
14778       finally:
14779         helper.Cleanup()
14780
14781       # Check for backwards compatibility
14782       assert len(dresults) == len(instance.disks)
14783       assert compat.all(isinstance(i, bool) for i in dresults), \
14784              "Not all results are boolean: %r" % dresults
14785
14786     finally:
14787       if activate_disks:
14788         feedback_fn("Deactivating disks for %s" % instance.name)
14789         _ShutdownInstanceDisks(self, instance)
14790
14791     if not (compat.all(dresults) and fin_resu):
14792       failures = []
14793       if not fin_resu:
14794         failures.append("export finalization")
14795       if not compat.all(dresults):
14796         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14797                                if not dsk)
14798         failures.append("disk export: disk(s) %s" % fdsk)
14799
14800       raise errors.OpExecError("Export failed, errors in %s" %
14801                                utils.CommaJoin(failures))
14802
14803     # At this point, the export was successful, we can cleanup/finish
14804
14805     # Remove instance if requested
14806     if self.op.remove_instance:
14807       feedback_fn("Removing instance %s" % instance.name)
14808       _RemoveInstance(self, feedback_fn, instance,
14809                       self.op.ignore_remove_failures)
14810
14811     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14812       self._CleanupExports(feedback_fn)
14813
14814     return fin_resu, dresults
14815
14816
14817 class LUBackupRemove(NoHooksLU):
14818   """Remove exports related to the named instance.
14819
14820   """
14821   REQ_BGL = False
14822
14823   def ExpandNames(self):
14824     self.needed_locks = {
14825       # We need all nodes to be locked in order for RemoveExport to work, but
14826       # we don't need to lock the instance itself, as nothing will happen to it
14827       # (and we can remove exports also for a removed instance)
14828       locking.LEVEL_NODE: locking.ALL_SET,
14829
14830       # Removing backups is quick, so blocking allocations is justified
14831       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14832       }
14833
14834     # Allocations should be stopped while this LU runs with node locks, but it
14835     # doesn't have to be exclusive
14836     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14837
14838   def Exec(self, feedback_fn):
14839     """Remove any export.
14840
14841     """
14842     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14843     # If the instance was not found we'll try with the name that was passed in.
14844     # This will only work if it was an FQDN, though.
14845     fqdn_warn = False
14846     if not instance_name:
14847       fqdn_warn = True
14848       instance_name = self.op.instance_name
14849
14850     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14851     exportlist = self.rpc.call_export_list(locked_nodes)
14852     found = False
14853     for node in exportlist:
14854       msg = exportlist[node].fail_msg
14855       if msg:
14856         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14857         continue
14858       if instance_name in exportlist[node].payload:
14859         found = True
14860         result = self.rpc.call_export_remove(node, instance_name)
14861         msg = result.fail_msg
14862         if msg:
14863           logging.error("Could not remove export for instance %s"
14864                         " on node %s: %s", instance_name, node, msg)
14865
14866     if fqdn_warn and not found:
14867       feedback_fn("Export not found. If trying to remove an export belonging"
14868                   " to a deleted instance please use its Fully Qualified"
14869                   " Domain Name.")
14870
14871
14872 class LUGroupAdd(LogicalUnit):
14873   """Logical unit for creating node groups.
14874
14875   """
14876   HPATH = "group-add"
14877   HTYPE = constants.HTYPE_GROUP
14878   REQ_BGL = False
14879
14880   def ExpandNames(self):
14881     # We need the new group's UUID here so that we can create and acquire the
14882     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14883     # that it should not check whether the UUID exists in the configuration.
14884     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14885     self.needed_locks = {}
14886     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14887
14888   def CheckPrereq(self):
14889     """Check prerequisites.
14890
14891     This checks that the given group name is not an existing node group
14892     already.
14893
14894     """
14895     try:
14896       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14897     except errors.OpPrereqError:
14898       pass
14899     else:
14900       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14901                                  " node group (UUID: %s)" %
14902                                  (self.op.group_name, existing_uuid),
14903                                  errors.ECODE_EXISTS)
14904
14905     if self.op.ndparams:
14906       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14907
14908     if self.op.hv_state:
14909       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14910     else:
14911       self.new_hv_state = None
14912
14913     if self.op.disk_state:
14914       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14915     else:
14916       self.new_disk_state = None
14917
14918     if self.op.diskparams:
14919       for templ in constants.DISK_TEMPLATES:
14920         if templ in self.op.diskparams:
14921           utils.ForceDictType(self.op.diskparams[templ],
14922                               constants.DISK_DT_TYPES)
14923       self.new_diskparams = self.op.diskparams
14924       try:
14925         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14926       except errors.OpPrereqError, err:
14927         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14928                                    errors.ECODE_INVAL)
14929     else:
14930       self.new_diskparams = {}
14931
14932     if self.op.ipolicy:
14933       cluster = self.cfg.GetClusterInfo()
14934       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14935       try:
14936         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14937       except errors.ConfigurationError, err:
14938         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14939                                    errors.ECODE_INVAL)
14940
14941   def BuildHooksEnv(self):
14942     """Build hooks env.
14943
14944     """
14945     return {
14946       "GROUP_NAME": self.op.group_name,
14947       }
14948
14949   def BuildHooksNodes(self):
14950     """Build hooks nodes.
14951
14952     """
14953     mn = self.cfg.GetMasterNode()
14954     return ([mn], [mn])
14955
14956   def Exec(self, feedback_fn):
14957     """Add the node group to the cluster.
14958
14959     """
14960     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14961                                   uuid=self.group_uuid,
14962                                   alloc_policy=self.op.alloc_policy,
14963                                   ndparams=self.op.ndparams,
14964                                   diskparams=self.new_diskparams,
14965                                   ipolicy=self.op.ipolicy,
14966                                   hv_state_static=self.new_hv_state,
14967                                   disk_state_static=self.new_disk_state)
14968
14969     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14970     del self.remove_locks[locking.LEVEL_NODEGROUP]
14971
14972
14973 class LUGroupAssignNodes(NoHooksLU):
14974   """Logical unit for assigning nodes to groups.
14975
14976   """
14977   REQ_BGL = False
14978
14979   def ExpandNames(self):
14980     # These raise errors.OpPrereqError on their own:
14981     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14982     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14983
14984     # We want to lock all the affected nodes and groups. We have readily
14985     # available the list of nodes, and the *destination* group. To gather the
14986     # list of "source" groups, we need to fetch node information later on.
14987     self.needed_locks = {
14988       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14989       locking.LEVEL_NODE: self.op.nodes,
14990       }
14991
14992   def DeclareLocks(self, level):
14993     if level == locking.LEVEL_NODEGROUP:
14994       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14995
14996       # Try to get all affected nodes' groups without having the group or node
14997       # lock yet. Needs verification later in the code flow.
14998       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14999
15000       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15001
15002   def CheckPrereq(self):
15003     """Check prerequisites.
15004
15005     """
15006     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15007     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15008             frozenset(self.op.nodes))
15009
15010     expected_locks = (set([self.group_uuid]) |
15011                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15012     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15013     if actual_locks != expected_locks:
15014       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15015                                " current groups are '%s', used to be '%s'" %
15016                                (utils.CommaJoin(expected_locks),
15017                                 utils.CommaJoin(actual_locks)))
15018
15019     self.node_data = self.cfg.GetAllNodesInfo()
15020     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15021     instance_data = self.cfg.GetAllInstancesInfo()
15022
15023     if self.group is None:
15024       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15025                                (self.op.group_name, self.group_uuid))
15026
15027     (new_splits, previous_splits) = \
15028       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15029                                              for node in self.op.nodes],
15030                                             self.node_data, instance_data)
15031
15032     if new_splits:
15033       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15034
15035       if not self.op.force:
15036         raise errors.OpExecError("The following instances get split by this"
15037                                  " change and --force was not given: %s" %
15038                                  fmt_new_splits)
15039       else:
15040         self.LogWarning("This operation will split the following instances: %s",
15041                         fmt_new_splits)
15042
15043         if previous_splits:
15044           self.LogWarning("In addition, these already-split instances continue"
15045                           " to be split across groups: %s",
15046                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15047
15048   def Exec(self, feedback_fn):
15049     """Assign nodes to a new group.
15050
15051     """
15052     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15053
15054     self.cfg.AssignGroupNodes(mods)
15055
15056   @staticmethod
15057   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15058     """Check for split instances after a node assignment.
15059
15060     This method considers a series of node assignments as an atomic operation,
15061     and returns information about split instances after applying the set of
15062     changes.
15063
15064     In particular, it returns information about newly split instances, and
15065     instances that were already split, and remain so after the change.
15066
15067     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15068     considered.
15069
15070     @type changes: list of (node_name, new_group_uuid) pairs.
15071     @param changes: list of node assignments to consider.
15072     @param node_data: a dict with data for all nodes
15073     @param instance_data: a dict with all instances to consider
15074     @rtype: a two-tuple
15075     @return: a list of instances that were previously okay and result split as a
15076       consequence of this change, and a list of instances that were previously
15077       split and this change does not fix.
15078
15079     """
15080     changed_nodes = dict((node, group) for node, group in changes
15081                          if node_data[node].group != group)
15082
15083     all_split_instances = set()
15084     previously_split_instances = set()
15085
15086     def InstanceNodes(instance):
15087       return [instance.primary_node] + list(instance.secondary_nodes)
15088
15089     for inst in instance_data.values():
15090       if inst.disk_template not in constants.DTS_INT_MIRROR:
15091         continue
15092
15093       instance_nodes = InstanceNodes(inst)
15094
15095       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15096         previously_split_instances.add(inst.name)
15097
15098       if len(set(changed_nodes.get(node, node_data[node].group)
15099                  for node in instance_nodes)) > 1:
15100         all_split_instances.add(inst.name)
15101
15102     return (list(all_split_instances - previously_split_instances),
15103             list(previously_split_instances & all_split_instances))
15104
15105
15106 class _GroupQuery(_QueryBase):
15107   FIELDS = query.GROUP_FIELDS
15108
15109   def ExpandNames(self, lu):
15110     lu.needed_locks = {}
15111
15112     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15113     self._cluster = lu.cfg.GetClusterInfo()
15114     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15115
15116     if not self.names:
15117       self.wanted = [name_to_uuid[name]
15118                      for name in utils.NiceSort(name_to_uuid.keys())]
15119     else:
15120       # Accept names to be either names or UUIDs.
15121       missing = []
15122       self.wanted = []
15123       all_uuid = frozenset(self._all_groups.keys())
15124
15125       for name in self.names:
15126         if name in all_uuid:
15127           self.wanted.append(name)
15128         elif name in name_to_uuid:
15129           self.wanted.append(name_to_uuid[name])
15130         else:
15131           missing.append(name)
15132
15133       if missing:
15134         raise errors.OpPrereqError("Some groups do not exist: %s" %
15135                                    utils.CommaJoin(missing),
15136                                    errors.ECODE_NOENT)
15137
15138   def DeclareLocks(self, lu, level):
15139     pass
15140
15141   def _GetQueryData(self, lu):
15142     """Computes the list of node groups and their attributes.
15143
15144     """
15145     do_nodes = query.GQ_NODE in self.requested_data
15146     do_instances = query.GQ_INST in self.requested_data
15147
15148     group_to_nodes = None
15149     group_to_instances = None
15150
15151     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15152     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15153     # latter GetAllInstancesInfo() is not enough, for we have to go through
15154     # instance->node. Hence, we will need to process nodes even if we only need
15155     # instance information.
15156     if do_nodes or do_instances:
15157       all_nodes = lu.cfg.GetAllNodesInfo()
15158       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15159       node_to_group = {}
15160
15161       for node in all_nodes.values():
15162         if node.group in group_to_nodes:
15163           group_to_nodes[node.group].append(node.name)
15164           node_to_group[node.name] = node.group
15165
15166       if do_instances:
15167         all_instances = lu.cfg.GetAllInstancesInfo()
15168         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15169
15170         for instance in all_instances.values():
15171           node = instance.primary_node
15172           if node in node_to_group:
15173             group_to_instances[node_to_group[node]].append(instance.name)
15174
15175         if not do_nodes:
15176           # Do not pass on node information if it was not requested.
15177           group_to_nodes = None
15178
15179     return query.GroupQueryData(self._cluster,
15180                                 [self._all_groups[uuid]
15181                                  for uuid in self.wanted],
15182                                 group_to_nodes, group_to_instances,
15183                                 query.GQ_DISKPARAMS in self.requested_data)
15184
15185
15186 class LUGroupQuery(NoHooksLU):
15187   """Logical unit for querying node groups.
15188
15189   """
15190   REQ_BGL = False
15191
15192   def CheckArguments(self):
15193     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15194                           self.op.output_fields, False)
15195
15196   def ExpandNames(self):
15197     self.gq.ExpandNames(self)
15198
15199   def DeclareLocks(self, level):
15200     self.gq.DeclareLocks(self, level)
15201
15202   def Exec(self, feedback_fn):
15203     return self.gq.OldStyleQuery(self)
15204
15205
15206 class LUGroupSetParams(LogicalUnit):
15207   """Modifies the parameters of a node group.
15208
15209   """
15210   HPATH = "group-modify"
15211   HTYPE = constants.HTYPE_GROUP
15212   REQ_BGL = False
15213
15214   def CheckArguments(self):
15215     all_changes = [
15216       self.op.ndparams,
15217       self.op.diskparams,
15218       self.op.alloc_policy,
15219       self.op.hv_state,
15220       self.op.disk_state,
15221       self.op.ipolicy,
15222       ]
15223
15224     if all_changes.count(None) == len(all_changes):
15225       raise errors.OpPrereqError("Please pass at least one modification",
15226                                  errors.ECODE_INVAL)
15227
15228   def ExpandNames(self):
15229     # This raises errors.OpPrereqError on its own:
15230     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15231
15232     self.needed_locks = {
15233       locking.LEVEL_INSTANCE: [],
15234       locking.LEVEL_NODEGROUP: [self.group_uuid],
15235       }
15236
15237     self.share_locks[locking.LEVEL_INSTANCE] = 1
15238
15239   def DeclareLocks(self, level):
15240     if level == locking.LEVEL_INSTANCE:
15241       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15242
15243       # Lock instances optimistically, needs verification once group lock has
15244       # been acquired
15245       self.needed_locks[locking.LEVEL_INSTANCE] = \
15246           self.cfg.GetNodeGroupInstances(self.group_uuid)
15247
15248   @staticmethod
15249   def _UpdateAndVerifyDiskParams(old, new):
15250     """Updates and verifies disk parameters.
15251
15252     """
15253     new_params = _GetUpdatedParams(old, new)
15254     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15255     return new_params
15256
15257   def CheckPrereq(self):
15258     """Check prerequisites.
15259
15260     """
15261     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15262
15263     # Check if locked instances are still correct
15264     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15265
15266     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15267     cluster = self.cfg.GetClusterInfo()
15268
15269     if self.group is None:
15270       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15271                                (self.op.group_name, self.group_uuid))
15272
15273     if self.op.ndparams:
15274       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15275       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15276       self.new_ndparams = new_ndparams
15277
15278     if self.op.diskparams:
15279       diskparams = self.group.diskparams
15280       uavdp = self._UpdateAndVerifyDiskParams
15281       # For each disktemplate subdict update and verify the values
15282       new_diskparams = dict((dt,
15283                              uavdp(diskparams.get(dt, {}),
15284                                    self.op.diskparams[dt]))
15285                             for dt in constants.DISK_TEMPLATES
15286                             if dt in self.op.diskparams)
15287       # As we've all subdicts of diskparams ready, lets merge the actual
15288       # dict with all updated subdicts
15289       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15290       try:
15291         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15292       except errors.OpPrereqError, err:
15293         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15294                                    errors.ECODE_INVAL)
15295
15296     if self.op.hv_state:
15297       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15298                                                  self.group.hv_state_static)
15299
15300     if self.op.disk_state:
15301       self.new_disk_state = \
15302         _MergeAndVerifyDiskState(self.op.disk_state,
15303                                  self.group.disk_state_static)
15304
15305     if self.op.ipolicy:
15306       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15307                                             self.op.ipolicy,
15308                                             group_policy=True)
15309
15310       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15311       inst_filter = lambda inst: inst.name in owned_instances
15312       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15313       gmi = ganeti.masterd.instance
15314       violations = \
15315           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15316                                                                   self.group),
15317                                         new_ipolicy, instances)
15318
15319       if violations:
15320         self.LogWarning("After the ipolicy change the following instances"
15321                         " violate them: %s",
15322                         utils.CommaJoin(violations))
15323
15324   def BuildHooksEnv(self):
15325     """Build hooks env.
15326
15327     """
15328     return {
15329       "GROUP_NAME": self.op.group_name,
15330       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15331       }
15332
15333   def BuildHooksNodes(self):
15334     """Build hooks nodes.
15335
15336     """
15337     mn = self.cfg.GetMasterNode()
15338     return ([mn], [mn])
15339
15340   def Exec(self, feedback_fn):
15341     """Modifies the node group.
15342
15343     """
15344     result = []
15345
15346     if self.op.ndparams:
15347       self.group.ndparams = self.new_ndparams
15348       result.append(("ndparams", str(self.group.ndparams)))
15349
15350     if self.op.diskparams:
15351       self.group.diskparams = self.new_diskparams
15352       result.append(("diskparams", str(self.group.diskparams)))
15353
15354     if self.op.alloc_policy:
15355       self.group.alloc_policy = self.op.alloc_policy
15356
15357     if self.op.hv_state:
15358       self.group.hv_state_static = self.new_hv_state
15359
15360     if self.op.disk_state:
15361       self.group.disk_state_static = self.new_disk_state
15362
15363     if self.op.ipolicy:
15364       self.group.ipolicy = self.new_ipolicy
15365
15366     self.cfg.Update(self.group, feedback_fn)
15367     return result
15368
15369
15370 class LUGroupRemove(LogicalUnit):
15371   HPATH = "group-remove"
15372   HTYPE = constants.HTYPE_GROUP
15373   REQ_BGL = False
15374
15375   def ExpandNames(self):
15376     # This will raises errors.OpPrereqError on its own:
15377     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15378     self.needed_locks = {
15379       locking.LEVEL_NODEGROUP: [self.group_uuid],
15380       }
15381
15382   def CheckPrereq(self):
15383     """Check prerequisites.
15384
15385     This checks that the given group name exists as a node group, that is
15386     empty (i.e., contains no nodes), and that is not the last group of the
15387     cluster.
15388
15389     """
15390     # Verify that the group is empty.
15391     group_nodes = [node.name
15392                    for node in self.cfg.GetAllNodesInfo().values()
15393                    if node.group == self.group_uuid]
15394
15395     if group_nodes:
15396       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15397                                  " nodes: %s" %
15398                                  (self.op.group_name,
15399                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15400                                  errors.ECODE_STATE)
15401
15402     # Verify the cluster would not be left group-less.
15403     if len(self.cfg.GetNodeGroupList()) == 1:
15404       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15405                                  " removed" % self.op.group_name,
15406                                  errors.ECODE_STATE)
15407
15408   def BuildHooksEnv(self):
15409     """Build hooks env.
15410
15411     """
15412     return {
15413       "GROUP_NAME": self.op.group_name,
15414       }
15415
15416   def BuildHooksNodes(self):
15417     """Build hooks nodes.
15418
15419     """
15420     mn = self.cfg.GetMasterNode()
15421     return ([mn], [mn])
15422
15423   def Exec(self, feedback_fn):
15424     """Remove the node group.
15425
15426     """
15427     try:
15428       self.cfg.RemoveNodeGroup(self.group_uuid)
15429     except errors.ConfigurationError:
15430       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15431                                (self.op.group_name, self.group_uuid))
15432
15433     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15434
15435
15436 class LUGroupRename(LogicalUnit):
15437   HPATH = "group-rename"
15438   HTYPE = constants.HTYPE_GROUP
15439   REQ_BGL = False
15440
15441   def ExpandNames(self):
15442     # This raises errors.OpPrereqError on its own:
15443     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15444
15445     self.needed_locks = {
15446       locking.LEVEL_NODEGROUP: [self.group_uuid],
15447       }
15448
15449   def CheckPrereq(self):
15450     """Check prerequisites.
15451
15452     Ensures requested new name is not yet used.
15453
15454     """
15455     try:
15456       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15457     except errors.OpPrereqError:
15458       pass
15459     else:
15460       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15461                                  " node group (UUID: %s)" %
15462                                  (self.op.new_name, new_name_uuid),
15463                                  errors.ECODE_EXISTS)
15464
15465   def BuildHooksEnv(self):
15466     """Build hooks env.
15467
15468     """
15469     return {
15470       "OLD_NAME": self.op.group_name,
15471       "NEW_NAME": self.op.new_name,
15472       }
15473
15474   def BuildHooksNodes(self):
15475     """Build hooks nodes.
15476
15477     """
15478     mn = self.cfg.GetMasterNode()
15479
15480     all_nodes = self.cfg.GetAllNodesInfo()
15481     all_nodes.pop(mn, None)
15482
15483     run_nodes = [mn]
15484     run_nodes.extend(node.name for node in all_nodes.values()
15485                      if node.group == self.group_uuid)
15486
15487     return (run_nodes, run_nodes)
15488
15489   def Exec(self, feedback_fn):
15490     """Rename the node group.
15491
15492     """
15493     group = self.cfg.GetNodeGroup(self.group_uuid)
15494
15495     if group is None:
15496       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15497                                (self.op.group_name, self.group_uuid))
15498
15499     group.name = self.op.new_name
15500     self.cfg.Update(group, feedback_fn)
15501
15502     return self.op.new_name
15503
15504
15505 class LUGroupEvacuate(LogicalUnit):
15506   HPATH = "group-evacuate"
15507   HTYPE = constants.HTYPE_GROUP
15508   REQ_BGL = False
15509
15510   def ExpandNames(self):
15511     # This raises errors.OpPrereqError on its own:
15512     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15513
15514     if self.op.target_groups:
15515       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15516                                   self.op.target_groups)
15517     else:
15518       self.req_target_uuids = []
15519
15520     if self.group_uuid in self.req_target_uuids:
15521       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15522                                  " as a target group (targets are %s)" %
15523                                  (self.group_uuid,
15524                                   utils.CommaJoin(self.req_target_uuids)),
15525                                  errors.ECODE_INVAL)
15526
15527     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15528
15529     self.share_locks = _ShareAll()
15530     self.needed_locks = {
15531       locking.LEVEL_INSTANCE: [],
15532       locking.LEVEL_NODEGROUP: [],
15533       locking.LEVEL_NODE: [],
15534       }
15535
15536   def DeclareLocks(self, level):
15537     if level == locking.LEVEL_INSTANCE:
15538       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15539
15540       # Lock instances optimistically, needs verification once node and group
15541       # locks have been acquired
15542       self.needed_locks[locking.LEVEL_INSTANCE] = \
15543         self.cfg.GetNodeGroupInstances(self.group_uuid)
15544
15545     elif level == locking.LEVEL_NODEGROUP:
15546       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15547
15548       if self.req_target_uuids:
15549         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15550
15551         # Lock all groups used by instances optimistically; this requires going
15552         # via the node before it's locked, requiring verification later on
15553         lock_groups.update(group_uuid
15554                            for instance_name in
15555                              self.owned_locks(locking.LEVEL_INSTANCE)
15556                            for group_uuid in
15557                              self.cfg.GetInstanceNodeGroups(instance_name))
15558       else:
15559         # No target groups, need to lock all of them
15560         lock_groups = locking.ALL_SET
15561
15562       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15563
15564     elif level == locking.LEVEL_NODE:
15565       # This will only lock the nodes in the group to be evacuated which
15566       # contain actual instances
15567       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15568       self._LockInstancesNodes()
15569
15570       # Lock all nodes in group to be evacuated and target groups
15571       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15572       assert self.group_uuid in owned_groups
15573       member_nodes = [node_name
15574                       for group in owned_groups
15575                       for node_name in self.cfg.GetNodeGroup(group).members]
15576       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15577
15578   def CheckPrereq(self):
15579     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15580     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15581     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15582
15583     assert owned_groups.issuperset(self.req_target_uuids)
15584     assert self.group_uuid in owned_groups
15585
15586     # Check if locked instances are still correct
15587     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15588
15589     # Get instance information
15590     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15591
15592     # Check if node groups for locked instances are still correct
15593     _CheckInstancesNodeGroups(self.cfg, self.instances,
15594                               owned_groups, owned_nodes, self.group_uuid)
15595
15596     if self.req_target_uuids:
15597       # User requested specific target groups
15598       self.target_uuids = self.req_target_uuids
15599     else:
15600       # All groups except the one to be evacuated are potential targets
15601       self.target_uuids = [group_uuid for group_uuid in owned_groups
15602                            if group_uuid != self.group_uuid]
15603
15604       if not self.target_uuids:
15605         raise errors.OpPrereqError("There are no possible target groups",
15606                                    errors.ECODE_INVAL)
15607
15608   def BuildHooksEnv(self):
15609     """Build hooks env.
15610
15611     """
15612     return {
15613       "GROUP_NAME": self.op.group_name,
15614       "TARGET_GROUPS": " ".join(self.target_uuids),
15615       }
15616
15617   def BuildHooksNodes(self):
15618     """Build hooks nodes.
15619
15620     """
15621     mn = self.cfg.GetMasterNode()
15622
15623     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15624
15625     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15626
15627     return (run_nodes, run_nodes)
15628
15629   def Exec(self, feedback_fn):
15630     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15631
15632     assert self.group_uuid not in self.target_uuids
15633
15634     req = iallocator.IAReqGroupChange(instances=instances,
15635                                       target_groups=self.target_uuids)
15636     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15637
15638     ial.Run(self.op.iallocator)
15639
15640     if not ial.success:
15641       raise errors.OpPrereqError("Can't compute group evacuation using"
15642                                  " iallocator '%s': %s" %
15643                                  (self.op.iallocator, ial.info),
15644                                  errors.ECODE_NORES)
15645
15646     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15647
15648     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15649                  len(jobs), self.op.group_name)
15650
15651     return ResultWithJobs(jobs)
15652
15653
15654 class TagsLU(NoHooksLU): # pylint: disable=W0223
15655   """Generic tags LU.
15656
15657   This is an abstract class which is the parent of all the other tags LUs.
15658
15659   """
15660   def ExpandNames(self):
15661     self.group_uuid = None
15662     self.needed_locks = {}
15663
15664     if self.op.kind == constants.TAG_NODE:
15665       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15666       lock_level = locking.LEVEL_NODE
15667       lock_name = self.op.name
15668     elif self.op.kind == constants.TAG_INSTANCE:
15669       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15670       lock_level = locking.LEVEL_INSTANCE
15671       lock_name = self.op.name
15672     elif self.op.kind == constants.TAG_NODEGROUP:
15673       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15674       lock_level = locking.LEVEL_NODEGROUP
15675       lock_name = self.group_uuid
15676     elif self.op.kind == constants.TAG_NETWORK:
15677       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15678       lock_level = locking.LEVEL_NETWORK
15679       lock_name = self.network_uuid
15680     else:
15681       lock_level = None
15682       lock_name = None
15683
15684     if lock_level and getattr(self.op, "use_locking", True):
15685       self.needed_locks[lock_level] = lock_name
15686
15687     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15688     # not possible to acquire the BGL based on opcode parameters)
15689
15690   def CheckPrereq(self):
15691     """Check prerequisites.
15692
15693     """
15694     if self.op.kind == constants.TAG_CLUSTER:
15695       self.target = self.cfg.GetClusterInfo()
15696     elif self.op.kind == constants.TAG_NODE:
15697       self.target = self.cfg.GetNodeInfo(self.op.name)
15698     elif self.op.kind == constants.TAG_INSTANCE:
15699       self.target = self.cfg.GetInstanceInfo(self.op.name)
15700     elif self.op.kind == constants.TAG_NODEGROUP:
15701       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15702     elif self.op.kind == constants.TAG_NETWORK:
15703       self.target = self.cfg.GetNetwork(self.network_uuid)
15704     else:
15705       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15706                                  str(self.op.kind), errors.ECODE_INVAL)
15707
15708
15709 class LUTagsGet(TagsLU):
15710   """Returns the tags of a given object.
15711
15712   """
15713   REQ_BGL = False
15714
15715   def ExpandNames(self):
15716     TagsLU.ExpandNames(self)
15717
15718     # Share locks as this is only a read operation
15719     self.share_locks = _ShareAll()
15720
15721   def Exec(self, feedback_fn):
15722     """Returns the tag list.
15723
15724     """
15725     return list(self.target.GetTags())
15726
15727
15728 class LUTagsSearch(NoHooksLU):
15729   """Searches the tags for a given pattern.
15730
15731   """
15732   REQ_BGL = False
15733
15734   def ExpandNames(self):
15735     self.needed_locks = {}
15736
15737   def CheckPrereq(self):
15738     """Check prerequisites.
15739
15740     This checks the pattern passed for validity by compiling it.
15741
15742     """
15743     try:
15744       self.re = re.compile(self.op.pattern)
15745     except re.error, err:
15746       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15747                                  (self.op.pattern, err), errors.ECODE_INVAL)
15748
15749   def Exec(self, feedback_fn):
15750     """Returns the tag list.
15751
15752     """
15753     cfg = self.cfg
15754     tgts = [("/cluster", cfg.GetClusterInfo())]
15755     ilist = cfg.GetAllInstancesInfo().values()
15756     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15757     nlist = cfg.GetAllNodesInfo().values()
15758     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15759     tgts.extend(("/nodegroup/%s" % n.name, n)
15760                 for n in cfg.GetAllNodeGroupsInfo().values())
15761     results = []
15762     for path, target in tgts:
15763       for tag in target.GetTags():
15764         if self.re.search(tag):
15765           results.append((path, tag))
15766     return results
15767
15768
15769 class LUTagsSet(TagsLU):
15770   """Sets a tag on a given object.
15771
15772   """
15773   REQ_BGL = False
15774
15775   def CheckPrereq(self):
15776     """Check prerequisites.
15777
15778     This checks the type and length of the tag name and value.
15779
15780     """
15781     TagsLU.CheckPrereq(self)
15782     for tag in self.op.tags:
15783       objects.TaggableObject.ValidateTag(tag)
15784
15785   def Exec(self, feedback_fn):
15786     """Sets the tag.
15787
15788     """
15789     try:
15790       for tag in self.op.tags:
15791         self.target.AddTag(tag)
15792     except errors.TagError, err:
15793       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15794     self.cfg.Update(self.target, feedback_fn)
15795
15796
15797 class LUTagsDel(TagsLU):
15798   """Delete a list of tags from a given object.
15799
15800   """
15801   REQ_BGL = False
15802
15803   def CheckPrereq(self):
15804     """Check prerequisites.
15805
15806     This checks that we have the given tag.
15807
15808     """
15809     TagsLU.CheckPrereq(self)
15810     for tag in self.op.tags:
15811       objects.TaggableObject.ValidateTag(tag)
15812     del_tags = frozenset(self.op.tags)
15813     cur_tags = self.target.GetTags()
15814
15815     diff_tags = del_tags - cur_tags
15816     if diff_tags:
15817       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15818       raise errors.OpPrereqError("Tag(s) %s not found" %
15819                                  (utils.CommaJoin(diff_names), ),
15820                                  errors.ECODE_NOENT)
15821
15822   def Exec(self, feedback_fn):
15823     """Remove the tag from the object.
15824
15825     """
15826     for tag in self.op.tags:
15827       self.target.RemoveTag(tag)
15828     self.cfg.Update(self.target, feedback_fn)
15829
15830
15831 class LUTestDelay(NoHooksLU):
15832   """Sleep for a specified amount of time.
15833
15834   This LU sleeps on the master and/or nodes for a specified amount of
15835   time.
15836
15837   """
15838   REQ_BGL = False
15839
15840   def ExpandNames(self):
15841     """Expand names and set required locks.
15842
15843     This expands the node list, if any.
15844
15845     """
15846     self.needed_locks = {}
15847     if self.op.on_nodes:
15848       # _GetWantedNodes can be used here, but is not always appropriate to use
15849       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15850       # more information.
15851       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15852       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15853
15854   def _TestDelay(self):
15855     """Do the actual sleep.
15856
15857     """
15858     if self.op.on_master:
15859       if not utils.TestDelay(self.op.duration):
15860         raise errors.OpExecError("Error during master delay test")
15861     if self.op.on_nodes:
15862       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15863       for node, node_result in result.items():
15864         node_result.Raise("Failure during rpc call to node %s" % node)
15865
15866   def Exec(self, feedback_fn):
15867     """Execute the test delay opcode, with the wanted repetitions.
15868
15869     """
15870     if self.op.repeat == 0:
15871       self._TestDelay()
15872     else:
15873       top_value = self.op.repeat - 1
15874       for i in range(self.op.repeat):
15875         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15876         self._TestDelay()
15877
15878
15879 class LURestrictedCommand(NoHooksLU):
15880   """Logical unit for executing restricted commands.
15881
15882   """
15883   REQ_BGL = False
15884
15885   def ExpandNames(self):
15886     if self.op.nodes:
15887       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15888
15889     self.needed_locks = {
15890       locking.LEVEL_NODE: self.op.nodes,
15891       }
15892     self.share_locks = {
15893       locking.LEVEL_NODE: not self.op.use_locking,
15894       }
15895
15896   def CheckPrereq(self):
15897     """Check prerequisites.
15898
15899     """
15900
15901   def Exec(self, feedback_fn):
15902     """Execute restricted command and return output.
15903
15904     """
15905     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15906
15907     # Check if correct locks are held
15908     assert set(self.op.nodes).issubset(owned_nodes)
15909
15910     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15911
15912     result = []
15913
15914     for node_name in self.op.nodes:
15915       nres = rpcres[node_name]
15916       if nres.fail_msg:
15917         msg = ("Command '%s' on node '%s' failed: %s" %
15918                (self.op.command, node_name, nres.fail_msg))
15919         result.append((False, msg))
15920       else:
15921         result.append((True, nres.payload))
15922
15923     return result
15924
15925
15926 class LUTestJqueue(NoHooksLU):
15927   """Utility LU to test some aspects of the job queue.
15928
15929   """
15930   REQ_BGL = False
15931
15932   # Must be lower than default timeout for WaitForJobChange to see whether it
15933   # notices changed jobs
15934   _CLIENT_CONNECT_TIMEOUT = 20.0
15935   _CLIENT_CONFIRM_TIMEOUT = 60.0
15936
15937   @classmethod
15938   def _NotifyUsingSocket(cls, cb, errcls):
15939     """Opens a Unix socket and waits for another program to connect.
15940
15941     @type cb: callable
15942     @param cb: Callback to send socket name to client
15943     @type errcls: class
15944     @param errcls: Exception class to use for errors
15945
15946     """
15947     # Using a temporary directory as there's no easy way to create temporary
15948     # sockets without writing a custom loop around tempfile.mktemp and
15949     # socket.bind
15950     tmpdir = tempfile.mkdtemp()
15951     try:
15952       tmpsock = utils.PathJoin(tmpdir, "sock")
15953
15954       logging.debug("Creating temporary socket at %s", tmpsock)
15955       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15956       try:
15957         sock.bind(tmpsock)
15958         sock.listen(1)
15959
15960         # Send details to client
15961         cb(tmpsock)
15962
15963         # Wait for client to connect before continuing
15964         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15965         try:
15966           (conn, _) = sock.accept()
15967         except socket.error, err:
15968           raise errcls("Client didn't connect in time (%s)" % err)
15969       finally:
15970         sock.close()
15971     finally:
15972       # Remove as soon as client is connected
15973       shutil.rmtree(tmpdir)
15974
15975     # Wait for client to close
15976     try:
15977       try:
15978         # pylint: disable=E1101
15979         # Instance of '_socketobject' has no ... member
15980         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15981         conn.recv(1)
15982       except socket.error, err:
15983         raise errcls("Client failed to confirm notification (%s)" % err)
15984     finally:
15985       conn.close()
15986
15987   def _SendNotification(self, test, arg, sockname):
15988     """Sends a notification to the client.
15989
15990     @type test: string
15991     @param test: Test name
15992     @param arg: Test argument (depends on test)
15993     @type sockname: string
15994     @param sockname: Socket path
15995
15996     """
15997     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15998
15999   def _Notify(self, prereq, test, arg):
16000     """Notifies the client of a test.
16001
16002     @type prereq: bool
16003     @param prereq: Whether this is a prereq-phase test
16004     @type test: string
16005     @param test: Test name
16006     @param arg: Test argument (depends on test)
16007
16008     """
16009     if prereq:
16010       errcls = errors.OpPrereqError
16011     else:
16012       errcls = errors.OpExecError
16013
16014     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16015                                                   test, arg),
16016                                    errcls)
16017
16018   def CheckArguments(self):
16019     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16020     self.expandnames_calls = 0
16021
16022   def ExpandNames(self):
16023     checkargs_calls = getattr(self, "checkargs_calls", 0)
16024     if checkargs_calls < 1:
16025       raise errors.ProgrammerError("CheckArguments was not called")
16026
16027     self.expandnames_calls += 1
16028
16029     if self.op.notify_waitlock:
16030       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16031
16032     self.LogInfo("Expanding names")
16033
16034     # Get lock on master node (just to get a lock, not for a particular reason)
16035     self.needed_locks = {
16036       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16037       }
16038
16039   def Exec(self, feedback_fn):
16040     if self.expandnames_calls < 1:
16041       raise errors.ProgrammerError("ExpandNames was not called")
16042
16043     if self.op.notify_exec:
16044       self._Notify(False, constants.JQT_EXEC, None)
16045
16046     self.LogInfo("Executing")
16047
16048     if self.op.log_messages:
16049       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16050       for idx, msg in enumerate(self.op.log_messages):
16051         self.LogInfo("Sending log message %s", idx + 1)
16052         feedback_fn(constants.JQT_MSGPREFIX + msg)
16053         # Report how many test messages have been sent
16054         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16055
16056     if self.op.fail:
16057       raise errors.OpExecError("Opcode failure was requested")
16058
16059     return True
16060
16061
16062 class LUTestAllocator(NoHooksLU):
16063   """Run allocator tests.
16064
16065   This LU runs the allocator tests
16066
16067   """
16068   def CheckPrereq(self):
16069     """Check prerequisites.
16070
16071     This checks the opcode parameters depending on the director and mode test.
16072
16073     """
16074     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16075                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16076       for attr in ["memory", "disks", "disk_template",
16077                    "os", "tags", "nics", "vcpus"]:
16078         if not hasattr(self.op, attr):
16079           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16080                                      attr, errors.ECODE_INVAL)
16081       iname = self.cfg.ExpandInstanceName(self.op.name)
16082       if iname is not None:
16083         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16084                                    iname, errors.ECODE_EXISTS)
16085       if not isinstance(self.op.nics, list):
16086         raise errors.OpPrereqError("Invalid parameter 'nics'",
16087                                    errors.ECODE_INVAL)
16088       if not isinstance(self.op.disks, list):
16089         raise errors.OpPrereqError("Invalid parameter 'disks'",
16090                                    errors.ECODE_INVAL)
16091       for row in self.op.disks:
16092         if (not isinstance(row, dict) or
16093             constants.IDISK_SIZE not in row or
16094             not isinstance(row[constants.IDISK_SIZE], int) or
16095             constants.IDISK_MODE not in row or
16096             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16097           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16098                                      " parameter", errors.ECODE_INVAL)
16099       if self.op.hypervisor is None:
16100         self.op.hypervisor = self.cfg.GetHypervisorType()
16101     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16102       fname = _ExpandInstanceName(self.cfg, self.op.name)
16103       self.op.name = fname
16104       self.relocate_from = \
16105           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16106     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16107                           constants.IALLOCATOR_MODE_NODE_EVAC):
16108       if not self.op.instances:
16109         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16110       self.op.instances = _GetWantedInstances(self, self.op.instances)
16111     else:
16112       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16113                                  self.op.mode, errors.ECODE_INVAL)
16114
16115     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16116       if self.op.iallocator is None:
16117         raise errors.OpPrereqError("Missing allocator name",
16118                                    errors.ECODE_INVAL)
16119     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16120       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16121                                  self.op.direction, errors.ECODE_INVAL)
16122
16123   def Exec(self, feedback_fn):
16124     """Run the allocator test.
16125
16126     """
16127     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16128       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16129                                           memory=self.op.memory,
16130                                           disks=self.op.disks,
16131                                           disk_template=self.op.disk_template,
16132                                           os=self.op.os,
16133                                           tags=self.op.tags,
16134                                           nics=self.op.nics,
16135                                           vcpus=self.op.vcpus,
16136                                           spindle_use=self.op.spindle_use,
16137                                           hypervisor=self.op.hypervisor)
16138     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16139       req = iallocator.IAReqRelocate(name=self.op.name,
16140                                      relocate_from=list(self.relocate_from))
16141     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16142       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16143                                         target_groups=self.op.target_groups)
16144     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16145       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16146                                      evac_mode=self.op.evac_mode)
16147     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16148       disk_template = self.op.disk_template
16149       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16150                                              memory=self.op.memory,
16151                                              disks=self.op.disks,
16152                                              disk_template=disk_template,
16153                                              os=self.op.os,
16154                                              tags=self.op.tags,
16155                                              nics=self.op.nics,
16156                                              vcpus=self.op.vcpus,
16157                                              spindle_use=self.op.spindle_use,
16158                                              hypervisor=self.op.hypervisor)
16159                for idx in range(self.op.count)]
16160       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16161     else:
16162       raise errors.ProgrammerError("Uncatched mode %s in"
16163                                    " LUTestAllocator.Exec", self.op.mode)
16164
16165     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16166     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16167       result = ial.in_text
16168     else:
16169       ial.Run(self.op.iallocator, validate=False)
16170       result = ial.out_text
16171     return result
16172
16173
16174 class LUNetworkAdd(LogicalUnit):
16175   """Logical unit for creating networks.
16176
16177   """
16178   HPATH = "network-add"
16179   HTYPE = constants.HTYPE_NETWORK
16180   REQ_BGL = False
16181
16182   def BuildHooksNodes(self):
16183     """Build hooks nodes.
16184
16185     """
16186     mn = self.cfg.GetMasterNode()
16187     return ([mn], [mn])
16188
16189   def CheckArguments(self):
16190     if self.op.mac_prefix:
16191       self.op.mac_prefix = \
16192         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16193
16194   def ExpandNames(self):
16195     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16196
16197     if self.op.conflicts_check:
16198       self.share_locks[locking.LEVEL_NODE] = 1
16199       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16200       self.needed_locks = {
16201         locking.LEVEL_NODE: locking.ALL_SET,
16202         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16203         }
16204     else:
16205       self.needed_locks = {}
16206
16207     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16208
16209   def CheckPrereq(self):
16210     if self.op.network is None:
16211       raise errors.OpPrereqError("Network must be given",
16212                                  errors.ECODE_INVAL)
16213
16214     try:
16215       existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16216     except errors.OpPrereqError:
16217       pass
16218     else:
16219       raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16220                                  " network (UUID: %s)" %
16221                                  (self.op.network_name, existing_uuid),
16222                                  errors.ECODE_EXISTS)
16223
16224     # Check tag validity
16225     for tag in self.op.tags:
16226       objects.TaggableObject.ValidateTag(tag)
16227
16228   def BuildHooksEnv(self):
16229     """Build hooks env.
16230
16231     """
16232     args = {
16233       "name": self.op.network_name,
16234       "subnet": self.op.network,
16235       "gateway": self.op.gateway,
16236       "network6": self.op.network6,
16237       "gateway6": self.op.gateway6,
16238       "mac_prefix": self.op.mac_prefix,
16239       "network_type": self.op.network_type,
16240       "tags": self.op.tags,
16241       }
16242     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16243
16244   def Exec(self, feedback_fn):
16245     """Add the ip pool to the cluster.
16246
16247     """
16248     nobj = objects.Network(name=self.op.network_name,
16249                            network=self.op.network,
16250                            gateway=self.op.gateway,
16251                            network6=self.op.network6,
16252                            gateway6=self.op.gateway6,
16253                            mac_prefix=self.op.mac_prefix,
16254                            network_type=self.op.network_type,
16255                            uuid=self.network_uuid,
16256                            family=constants.IP4_VERSION)
16257     # Initialize the associated address pool
16258     try:
16259       pool = network.AddressPool.InitializeNetwork(nobj)
16260     except errors.AddressPoolError, e:
16261       raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16262
16263     # Check if we need to reserve the nodes and the cluster master IP
16264     # These may not be allocated to any instances in routed mode, as
16265     # they wouldn't function anyway.
16266     if self.op.conflicts_check:
16267       for node in self.cfg.GetAllNodesInfo().values():
16268         for ip in [node.primary_ip, node.secondary_ip]:
16269           try:
16270             if pool.Contains(ip):
16271               pool.Reserve(ip)
16272               self.LogInfo("Reserved IP address of node '%s' (%s)",
16273                            node.name, ip)
16274           except errors.AddressPoolError:
16275             self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16276                             node.name, ip)
16277
16278       master_ip = self.cfg.GetClusterInfo().master_ip
16279       try:
16280         if pool.Contains(master_ip):
16281           pool.Reserve(master_ip)
16282           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16283       except errors.AddressPoolError:
16284         self.LogWarning("Cannot reserve cluster master IP address (%s)",
16285                         master_ip)
16286
16287     if self.op.add_reserved_ips:
16288       for ip in self.op.add_reserved_ips:
16289         try:
16290           pool.Reserve(ip, external=True)
16291         except errors.AddressPoolError, e:
16292           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16293
16294     if self.op.tags:
16295       for tag in self.op.tags:
16296         nobj.AddTag(tag)
16297
16298     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16299     del self.remove_locks[locking.LEVEL_NETWORK]
16300
16301
16302 class LUNetworkRemove(LogicalUnit):
16303   HPATH = "network-remove"
16304   HTYPE = constants.HTYPE_NETWORK
16305   REQ_BGL = False
16306
16307   def ExpandNames(self):
16308     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16309
16310     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16311     self.needed_locks = {
16312       locking.LEVEL_NETWORK: [self.network_uuid],
16313       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16314       }
16315
16316   def CheckPrereq(self):
16317     """Check prerequisites.
16318
16319     This checks that the given network name exists as a network, that is
16320     empty (i.e., contains no nodes), and that is not the last group of the
16321     cluster.
16322
16323     """
16324     # Verify that the network is not conncted.
16325     node_groups = [group.name
16326                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16327                    if self.network_uuid in group.networks]
16328
16329     if node_groups:
16330       self.LogWarning("Network '%s' is connected to the following"
16331                       " node groups: %s" %
16332                       (self.op.network_name,
16333                        utils.CommaJoin(utils.NiceSort(node_groups))))
16334       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16335
16336   def BuildHooksEnv(self):
16337     """Build hooks env.
16338
16339     """
16340     return {
16341       "NETWORK_NAME": self.op.network_name,
16342       }
16343
16344   def BuildHooksNodes(self):
16345     """Build hooks nodes.
16346
16347     """
16348     mn = self.cfg.GetMasterNode()
16349     return ([mn], [mn])
16350
16351   def Exec(self, feedback_fn):
16352     """Remove the network.
16353
16354     """
16355     try:
16356       self.cfg.RemoveNetwork(self.network_uuid)
16357     except errors.ConfigurationError:
16358       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16359                                (self.op.network_name, self.network_uuid))
16360
16361
16362 class LUNetworkSetParams(LogicalUnit):
16363   """Modifies the parameters of a network.
16364
16365   """
16366   HPATH = "network-modify"
16367   HTYPE = constants.HTYPE_NETWORK
16368   REQ_BGL = False
16369
16370   def CheckArguments(self):
16371     if (self.op.gateway and
16372         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16373       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16374                                  " at once", errors.ECODE_INVAL)
16375
16376   def ExpandNames(self):
16377     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16378
16379     self.needed_locks = {
16380       locking.LEVEL_NETWORK: [self.network_uuid],
16381       }
16382
16383   def CheckPrereq(self):
16384     """Check prerequisites.
16385
16386     """
16387     self.network = self.cfg.GetNetwork(self.network_uuid)
16388     self.gateway = self.network.gateway
16389     self.network_type = self.network.network_type
16390     self.mac_prefix = self.network.mac_prefix
16391     self.network6 = self.network.network6
16392     self.gateway6 = self.network.gateway6
16393     self.tags = self.network.tags
16394
16395     self.pool = network.AddressPool(self.network)
16396
16397     if self.op.gateway:
16398       if self.op.gateway == constants.VALUE_NONE:
16399         self.gateway = None
16400       else:
16401         self.gateway = self.op.gateway
16402         if self.pool.IsReserved(self.gateway):
16403           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16404                                      " reserved" % self.gateway,
16405                                      errors.ECODE_STATE)
16406
16407     if self.op.network_type:
16408       if self.op.network_type == constants.VALUE_NONE:
16409         self.network_type = None
16410       else:
16411         self.network_type = self.op.network_type
16412
16413     if self.op.mac_prefix:
16414       if self.op.mac_prefix == constants.VALUE_NONE:
16415         self.mac_prefix = None
16416       else:
16417         self.mac_prefix = \
16418           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16419
16420     if self.op.gateway6:
16421       if self.op.gateway6 == constants.VALUE_NONE:
16422         self.gateway6 = None
16423       else:
16424         self.gateway6 = self.op.gateway6
16425
16426     if self.op.network6:
16427       if self.op.network6 == constants.VALUE_NONE:
16428         self.network6 = None
16429       else:
16430         self.network6 = self.op.network6
16431
16432   def BuildHooksEnv(self):
16433     """Build hooks env.
16434
16435     """
16436     args = {
16437       "name": self.op.network_name,
16438       "subnet": self.network.network,
16439       "gateway": self.gateway,
16440       "network6": self.network6,
16441       "gateway6": self.gateway6,
16442       "mac_prefix": self.mac_prefix,
16443       "network_type": self.network_type,
16444       "tags": self.tags,
16445       }
16446     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16447
16448   def BuildHooksNodes(self):
16449     """Build hooks nodes.
16450
16451     """
16452     mn = self.cfg.GetMasterNode()
16453     return ([mn], [mn])
16454
16455   def Exec(self, feedback_fn):
16456     """Modifies the network.
16457
16458     """
16459     #TODO: reserve/release via temporary reservation manager
16460     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16461     if self.op.gateway:
16462       if self.gateway == self.network.gateway:
16463         self.LogWarning("Gateway is already %s", self.gateway)
16464       else:
16465         if self.gateway:
16466           self.pool.Reserve(self.gateway, external=True)
16467         if self.network.gateway:
16468           self.pool.Release(self.network.gateway, external=True)
16469         self.network.gateway = self.gateway
16470
16471     if self.op.add_reserved_ips:
16472       for ip in self.op.add_reserved_ips:
16473         try:
16474           if self.pool.IsReserved(ip):
16475             self.LogWarning("IP address %s is already reserved", ip)
16476           else:
16477             self.pool.Reserve(ip, external=True)
16478         except errors.AddressPoolError, err:
16479           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16480
16481     if self.op.remove_reserved_ips:
16482       for ip in self.op.remove_reserved_ips:
16483         if ip == self.network.gateway:
16484           self.LogWarning("Cannot unreserve Gateway's IP")
16485           continue
16486         try:
16487           if not self.pool.IsReserved(ip):
16488             self.LogWarning("IP address %s is already unreserved", ip)
16489           else:
16490             self.pool.Release(ip, external=True)
16491         except errors.AddressPoolError, err:
16492           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16493
16494     if self.op.mac_prefix:
16495       self.network.mac_prefix = self.mac_prefix
16496
16497     if self.op.network6:
16498       self.network.network6 = self.network6
16499
16500     if self.op.gateway6:
16501       self.network.gateway6 = self.gateway6
16502
16503     if self.op.network_type:
16504       self.network.network_type = self.network_type
16505
16506     self.pool.Validate()
16507
16508     self.cfg.Update(self.network, feedback_fn)
16509
16510
16511 class _NetworkQuery(_QueryBase):
16512   FIELDS = query.NETWORK_FIELDS
16513
16514   def ExpandNames(self, lu):
16515     lu.needed_locks = {}
16516     lu.share_locks = _ShareAll()
16517
16518     self.do_locking = self.use_locking
16519
16520     all_networks = lu.cfg.GetAllNetworksInfo()
16521     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16522
16523     if self.names:
16524       missing = []
16525       self.wanted = []
16526
16527       for name in self.names:
16528         if name in name_to_uuid:
16529           self.wanted.append(name_to_uuid[name])
16530         else:
16531           missing.append(name)
16532
16533       if missing:
16534         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16535                                    errors.ECODE_NOENT)
16536     else:
16537       self.wanted = locking.ALL_SET
16538
16539     if self.do_locking:
16540       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16541       if query.NETQ_INST in self.requested_data:
16542         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16543       if query.NETQ_GROUP in self.requested_data:
16544         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16545
16546   def DeclareLocks(self, lu, level):
16547     pass
16548
16549   def _GetQueryData(self, lu):
16550     """Computes the list of networks and their attributes.
16551
16552     """
16553     all_networks = lu.cfg.GetAllNetworksInfo()
16554
16555     network_uuids = self._GetNames(lu, all_networks.keys(),
16556                                    locking.LEVEL_NETWORK)
16557
16558     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16559
16560     do_instances = query.NETQ_INST in self.requested_data
16561     do_groups = query.NETQ_GROUP in self.requested_data
16562
16563     network_to_instances = None
16564     network_to_groups = None
16565
16566     # For NETQ_GROUP, we need to map network->[groups]
16567     if do_groups:
16568       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16569       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16570       for _, group in all_groups.iteritems():
16571         for net_uuid in network_uuids:
16572           netparams = group.networks.get(net_uuid, None)
16573           if netparams:
16574             info = (group.name, netparams[constants.NIC_MODE],
16575                     netparams[constants.NIC_LINK])
16576
16577             network_to_groups[net_uuid].append(info)
16578
16579     if do_instances:
16580       all_instances = lu.cfg.GetAllInstancesInfo()
16581       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16582       for instance in all_instances.values():
16583         for nic in instance.nics:
16584           if nic.network:
16585             net_uuid = name_to_uuid[nic.network]
16586             if net_uuid in network_uuids:
16587               network_to_instances[net_uuid].append(instance.name)
16588             break
16589
16590     if query.NETQ_STATS in self.requested_data:
16591       stats = \
16592         dict((uuid,
16593               self._GetStats(network.AddressPool(all_networks[uuid])))
16594              for uuid in network_uuids)
16595     else:
16596       stats = None
16597
16598     return query.NetworkQueryData([all_networks[uuid]
16599                                    for uuid in network_uuids],
16600                                    network_to_groups,
16601                                    network_to_instances,
16602                                    stats)
16603
16604   @staticmethod
16605   def _GetStats(pool):
16606     """Returns statistics for a network address pool.
16607
16608     """
16609     return {
16610       "free_count": pool.GetFreeCount(),
16611       "reserved_count": pool.GetReservedCount(),
16612       "map": pool.GetMap(),
16613       "external_reservations":
16614         utils.CommaJoin(pool.GetExternalReservations()),
16615       }
16616
16617
16618 class LUNetworkQuery(NoHooksLU):
16619   """Logical unit for querying networks.
16620
16621   """
16622   REQ_BGL = False
16623
16624   def CheckArguments(self):
16625     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16626                             self.op.output_fields, self.op.use_locking)
16627
16628   def ExpandNames(self):
16629     self.nq.ExpandNames(self)
16630
16631   def Exec(self, feedback_fn):
16632     return self.nq.OldStyleQuery(self)
16633
16634
16635 class LUNetworkConnect(LogicalUnit):
16636   """Connect a network to a nodegroup
16637
16638   """
16639   HPATH = "network-connect"
16640   HTYPE = constants.HTYPE_NETWORK
16641   REQ_BGL = False
16642
16643   def ExpandNames(self):
16644     self.network_name = self.op.network_name
16645     self.group_name = self.op.group_name
16646     self.network_mode = self.op.network_mode
16647     self.network_link = self.op.network_link
16648
16649     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16650     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16651
16652     self.needed_locks = {
16653       locking.LEVEL_INSTANCE: [],
16654       locking.LEVEL_NODEGROUP: [self.group_uuid],
16655       }
16656     self.share_locks[locking.LEVEL_INSTANCE] = 1
16657
16658     if self.op.conflicts_check:
16659       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16660       self.share_locks[locking.LEVEL_NETWORK] = 1
16661
16662   def DeclareLocks(self, level):
16663     if level == locking.LEVEL_INSTANCE:
16664       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16665
16666       # Lock instances optimistically, needs verification once group lock has
16667       # been acquired
16668       if self.op.conflicts_check:
16669         self.needed_locks[locking.LEVEL_INSTANCE] = \
16670             self.cfg.GetNodeGroupInstances(self.group_uuid)
16671
16672   def BuildHooksEnv(self):
16673     ret = {
16674       "GROUP_NAME": self.group_name,
16675       "GROUP_NETWORK_MODE": self.network_mode,
16676       "GROUP_NETWORK_LINK": self.network_link,
16677       }
16678     return ret
16679
16680   def BuildHooksNodes(self):
16681     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16682     return (nodes, nodes)
16683
16684   def CheckPrereq(self):
16685     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16686
16687     assert self.group_uuid in owned_groups
16688
16689     self.netparams = {
16690       constants.NIC_MODE: self.network_mode,
16691       constants.NIC_LINK: self.network_link,
16692       }
16693     objects.NIC.CheckParameterSyntax(self.netparams)
16694
16695     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16696     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16697     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16698     self.connected = False
16699     if self.network_uuid in self.group.networks:
16700       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16701                       (self.network_name, self.group.name))
16702       self.connected = True
16703       return
16704
16705     if self.op.conflicts_check:
16706       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16707
16708       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16709                             "connect to")
16710
16711   def Exec(self, feedback_fn):
16712     if self.connected:
16713       return
16714
16715     self.group.networks[self.network_uuid] = self.netparams
16716     self.cfg.Update(self.group, feedback_fn)
16717
16718
16719 def _NetworkConflictCheck(lu, check_fn, action):
16720   """Checks for network interface conflicts with a network.
16721
16722   @type lu: L{LogicalUnit}
16723   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16724     returning boolean
16725   @param check_fn: Function checking for conflict
16726   @type action: string
16727   @param action: Part of error message (see code)
16728   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16729
16730   """
16731   # Check if locked instances are still correct
16732   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16733   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16734
16735   conflicts = []
16736
16737   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16738     instconflicts = [(idx, nic.ip)
16739                      for (idx, nic) in enumerate(instance.nics)
16740                      if check_fn(nic)]
16741
16742     if instconflicts:
16743       conflicts.append((instance.name, instconflicts))
16744
16745   if conflicts:
16746     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16747                   " node group '%s', are in use: %s" %
16748                   (lu.network_name, action, lu.group.name,
16749                    utils.CommaJoin(("%s: %s" %
16750                                     (name, _FmtNetworkConflict(details)))
16751                                    for (name, details) in conflicts)))
16752
16753     raise errors.OpPrereqError("Conflicting IP addresses found; "
16754                                " remove/modify the corresponding network"
16755                                " interfaces", errors.ECODE_STATE)
16756
16757
16758 def _FmtNetworkConflict(details):
16759   """Utility for L{_NetworkConflictCheck}.
16760
16761   """
16762   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16763                          for (idx, ipaddr) in details)
16764
16765
16766 class LUNetworkDisconnect(LogicalUnit):
16767   """Disconnect a network to a nodegroup
16768
16769   """
16770   HPATH = "network-disconnect"
16771   HTYPE = constants.HTYPE_NETWORK
16772   REQ_BGL = False
16773
16774   def ExpandNames(self):
16775     self.network_name = self.op.network_name
16776     self.group_name = self.op.group_name
16777
16778     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16779     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16780
16781     self.needed_locks = {
16782       locking.LEVEL_INSTANCE: [],
16783       locking.LEVEL_NODEGROUP: [self.group_uuid],
16784       }
16785     self.share_locks[locking.LEVEL_INSTANCE] = 1
16786
16787   def DeclareLocks(self, level):
16788     if level == locking.LEVEL_INSTANCE:
16789       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16790
16791       # Lock instances optimistically, needs verification once group lock has
16792       # been acquired
16793       self.needed_locks[locking.LEVEL_INSTANCE] = \
16794         self.cfg.GetNodeGroupInstances(self.group_uuid)
16795
16796   def BuildHooksEnv(self):
16797     ret = {
16798       "GROUP_NAME": self.group_name,
16799       }
16800     return ret
16801
16802   def BuildHooksNodes(self):
16803     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16804     return (nodes, nodes)
16805
16806   def CheckPrereq(self):
16807     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16808
16809     assert self.group_uuid in owned_groups
16810
16811     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16812     self.connected = True
16813     if self.network_uuid not in self.group.networks:
16814       self.LogWarning("Network '%s' is not mapped to group '%s'",
16815                       self.network_name, self.group.name)
16816       self.connected = False
16817       return
16818
16819     _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16820                           "disconnect from")
16821
16822   def Exec(self, feedback_fn):
16823     if not self.connected:
16824       return
16825
16826     del self.group.networks[self.network_uuid]
16827     self.cfg.Update(self.group, feedback_fn)
16828
16829
16830 #: Query type implementations
16831 _QUERY_IMPL = {
16832   constants.QR_CLUSTER: _ClusterQuery,
16833   constants.QR_INSTANCE: _InstanceQuery,
16834   constants.QR_NODE: _NodeQuery,
16835   constants.QR_GROUP: _GroupQuery,
16836   constants.QR_NETWORK: _NetworkQuery,
16837   constants.QR_OS: _OsQuery,
16838   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16839   constants.QR_EXPORT: _ExportQuery,
16840   }
16841
16842 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16843
16844
16845 def _GetQueryImplementation(name):
16846   """Returns the implemtnation for a query type.
16847
16848   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16849
16850   """
16851   try:
16852     return _QUERY_IMPL[name]
16853   except KeyError:
16854     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16855                                errors.ECODE_INVAL)
16856
16857
16858 def _CheckForConflictingIp(lu, ip, node):
16859   """In case of conflicting IP address raise error.
16860
16861   @type ip: string
16862   @param ip: IP address
16863   @type node: string
16864   @param node: node name
16865
16866   """
16867   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16868   if conf_net is not None:
16869     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16870                                 (ip, conf_net)),
16871                                errors.ECODE_STATE)
16872
16873   return (None, None)