code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti import network
  65 from ganeti.masterd import iallocator
  66
  67 import ganeti.masterd.instance # pylint: disable=W0611
  68
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141
 142     # Dictionaries used to declare locking needs to mcpu
 143     self.needed_locks = None
 144     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 145     self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
 146
 147     self.add_locks = {}
 148     self.remove_locks = {}
 149
 150     # Used to force good behavior when calling helper functions
 151     self.recalculate_locks = {}
 152
 153     # logging
 154     self.Log = processor.Log # pylint: disable=C0103
 155     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 156     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 157     self.LogStep = processor.LogStep # pylint: disable=C0103
 158     # support for dry-run
 159     self.dry_run_result = None
 160     # support for generic debug attribute
 161     if (not hasattr(self.op, "debug_level") or
 162         not isinstance(self.op.debug_level, int)):
 163       self.op.debug_level = 0
 164
 165     # Tasklets
 166     self.tasklets = None
 167
 168     # Validate opcode parameters and set defaults
 169     self.op.Validate(True)
 170
 171     self.CheckArguments()
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that
 205         level (note that in this case C{DeclareLocks} won't be called
 206         at all for that level)
 207       - if you need locks at a level, but you can't calculate it in
 208         this function, initialise that level with an empty list and do
 209         further processing in L{LogicalUnit.DeclareLocks} (see that
 210         function's docstring)
 211       - don't put anything for the BGL level
 212       - if you want all locks at a level use L{locking.ALL_SET} as a value
 213
 214     If you need to share locks (rather than acquire them exclusively) at one
 215     level you can modify self.share_locks, setting a true value (usually 1) for
 216     that level. By default locks are not shared.
 217
 218     This function can also define a list of tasklets, which then will be
 219     executed in order instead of the usual LU-level CheckPrereq and Exec
 220     functions, if those are not defined by the LU.
 221
 222     Examples::
 223
 224       # Acquire all nodes and one instance
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: locking.ALL_SET,
 227         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 228       }
 229       # Acquire just two nodes
 230       self.needed_locks = {
 231         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 232       }
 233       # Acquire no locks
 234       self.needed_locks = {} # No, you can't leave it to the default value None
 235
 236     """
 237     # The implementation of this method is mandatory only if the new LU is
 238     # concurrent, so that old LUs don't need to be changed all at the same
 239     # time.
 240     if self.REQ_BGL:
 241       self.needed_locks = {} # Exclusive LUs don't need locks.
 242     else:
 243       raise NotImplementedError
 244
 245   def DeclareLocks(self, level):
 246     """Declare LU locking needs for a level
 247
 248     While most LUs can just declare their locking needs at ExpandNames time,
 249     sometimes there's the need to calculate some locks after having acquired
 250     the ones before. This function is called just before acquiring locks at a
 251     particular level, but after acquiring the ones at lower levels, and permits
 252     such calculations. It can be used to modify self.needed_locks, and by
 253     default it does nothing.
 254
 255     This function is only called if you have something already set in
 256     self.needed_locks for the level.
 257
 258     @param level: Locking level which is going to be locked
 259     @type level: member of L{ganeti.locking.LEVELS}
 260
 261     """
 262
 263   def CheckPrereq(self):
 264     """Check prerequisites for this LU.
 265
 266     This method should check that the prerequisites for the execution
 267     of this LU are fulfilled. It can do internode communication, but
 268     it should be idempotent - no cluster or system changes are
 269     allowed.
 270
 271     The method should raise errors.OpPrereqError in case something is
 272     not fulfilled. Its return value is ignored.
 273
 274     This method should also update all the parameters of the opcode to
 275     their canonical form if it hasn't been done by ExpandNames before.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Checking prerequisites for tasklet %s/%s",
 281                       idx + 1, len(self.tasklets))
 282         tl.CheckPrereq()
 283     else:
 284       pass
 285
 286   def Exec(self, feedback_fn):
 287     """Execute the LU.
 288
 289     This method should implement the actual work. It should raise
 290     errors.OpExecError for failures that are somewhat dealt with in
 291     code, or expected.
 292
 293     """
 294     if self.tasklets is not None:
 295       for (idx, tl) in enumerate(self.tasklets):
 296         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 297         tl.Exec(feedback_fn)
 298     else:
 299       raise NotImplementedError
 300
 301   def BuildHooksEnv(self):
 302     """Build hooks environment for this LU.
 303
 304     @rtype: dict
 305     @return: Dictionary containing the environment that will be used for
 306       running the hooks for this LU. The keys of the dict must not be prefixed
 307       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 308       will extend the environment with additional variables. If no environment
 309       should be defined, an empty dictionary should be returned (not C{None}).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def BuildHooksNodes(self):
 317     """Build list of nodes to run LU's hooks.
 318
 319     @rtype: tuple; (list, list)
 320     @return: Tuple containing a list of node names on which the hook
 321       should run before the execution and a list of node names on which the
 322       hook should run after the execution. No nodes should be returned as an
 323       empty list (and not None).
 324     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 325       will not be called.
 326
 327     """
 328     raise NotImplementedError
 329
 330   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 331     """Notify the LU about the results of its hooks.
 332
 333     This method is called every time a hooks phase is executed, and notifies
 334     the Logical Unit about the hooks' result. The LU can then use it to alter
 335     its result based on the hooks.  By default the method does nothing and the
 336     previous result is passed back unchanged but any LU can define it if it
 337     wants to use the local cluster hook-scripts somehow.
 338
 339     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 340         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 341     @param hook_results: the results of the multi-node hooks rpc call
 342     @param feedback_fn: function used send feedback back to the caller
 343     @param lu_result: the previous Exec result this LU had, or None
 344         in the PRE phase
 345     @return: the new Exec result, based on the previous result
 346         and hook results
 347
 348     """
 349     # API must be kept, thus we ignore the unused argument and could
 350     # be a function warnings
 351     # pylint: disable=W0613,R0201
 352     return lu_result
 353
 354   def _ExpandAndLockInstance(self):
 355     """Helper function to expand and lock an instance.
 356
 357     Many LUs that work on an instance take its name in self.op.instance_name
 358     and need to expand it and then declare the expanded name for locking. This
 359     function does it, and then updates self.op.instance_name to the expanded
 360     name. It also initializes needed_locks as a dict, if this hasn't been done
 361     before.
 362
 363     """
 364     if self.needed_locks is None:
 365       self.needed_locks = {}
 366     else:
 367       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 368         "_ExpandAndLockInstance called with instance-level locks set"
 369     self.op.instance_name = _ExpandInstanceName(self.cfg,
 370                                                 self.op.instance_name)
 371     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 372
 373   def _LockInstancesNodes(self, primary_only=False,
 374                           level=locking.LEVEL_NODE):
 375     """Helper function to declare instances' nodes for locking.
 376
 377     This function should be called after locking one or more instances to lock
 378     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 379     with all primary or secondary nodes for instances already locked and
 380     present in self.needed_locks[locking.LEVEL_INSTANCE].
 381
 382     It should be called from DeclareLocks, and for safety only works if
 383     self.recalculate_locks[locking.LEVEL_NODE] is set.
 384
 385     In the future it may grow parameters to just lock some instance's nodes, or
 386     to just lock primaries or secondary nodes, if needed.
 387
 388     If should be called in DeclareLocks in a way similar to::
 389
 390       if level == locking.LEVEL_NODE:
 391         self._LockInstancesNodes()
 392
 393     @type primary_only: boolean
 394     @param primary_only: only lock primary nodes of locked instances
 395     @param level: Which lock level to use for locking nodes
 396
 397     """
 398     assert level in self.recalculate_locks, \
 399       "_LockInstancesNodes helper function called with no nodes to recalculate"
 400
 401     # TODO: check if we're really been called with the instance locks held
 402
 403     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 404     # future we might want to have different behaviors depending on the value
 405     # of self.recalculate_locks[locking.LEVEL_NODE]
 406     wanted_nodes = []
 407     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 408     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 409       wanted_nodes.append(instance.primary_node)
 410       if not primary_only:
 411         wanted_nodes.extend(instance.secondary_nodes)
 412
 413     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 414       self.needed_locks[level] = wanted_nodes
 415     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 416       self.needed_locks[level].extend(wanted_nodes)
 417     else:
 418       raise errors.ProgrammerError("Unknown recalculation mode")
 419
 420     del self.recalculate_locks[level]
 421
 422
 423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 424   """Simple LU which runs no hooks.
 425
 426   This LU is intended as a parent for other LogicalUnits which will
 427   run no hooks, in order to reduce duplicate code.
 428
 429   """
 430   HPATH = None
 431   HTYPE = None
 432
 433   def BuildHooksEnv(self):
 434     """Empty BuildHooksEnv for NoHooksLu.
 435
 436     This just raises an error.
 437
 438     """
 439     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 440
 441   def BuildHooksNodes(self):
 442     """Empty BuildHooksNodes for NoHooksLU.
 443
 444     """
 445     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 446
 447
 448 class Tasklet:
 449   """Tasklet base class.
 450
 451   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 452   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 453   tasklets know nothing about locks.
 454
 455   Subclasses must follow these rules:
 456     - Implement CheckPrereq
 457     - Implement Exec
 458
 459   """
 460   def __init__(self, lu):
 461     self.lu = lu
 462
 463     # Shortcuts
 464     self.cfg = lu.cfg
 465     self.rpc = lu.rpc
 466
 467   def CheckPrereq(self):
 468     """Check prerequisites for this tasklets.
 469
 470     This method should check whether the prerequisites for the execution of
 471     this tasklet are fulfilled. It can do internode communication, but it
 472     should be idempotent - no cluster or system changes are allowed.
 473
 474     The method should raise errors.OpPrereqError in case something is not
 475     fulfilled. Its return value is ignored.
 476
 477     This method should also update all parameters to their canonical form if it
 478     hasn't been done before.
 479
 480     """
 481     pass
 482
 483   def Exec(self, feedback_fn):
 484     """Execute the tasklet.
 485
 486     This method should implement the actual work. It should raise
 487     errors.OpExecError for failures that are somewhat dealt with in code, or
 488     expected.
 489
 490     """
 491     raise NotImplementedError
 492
 493
 494 class _QueryBase:
 495   """Base for query utility classes.
 496
 497   """
 498   #: Attribute holding field definitions
 499   FIELDS = None
 500
 501   #: Field to sort by
 502   SORT_FIELD = "name"
 503
 504   def __init__(self, qfilter, fields, use_locking):
 505     """Initializes this class.
 506
 507     """
 508     self.use_locking = use_locking
 509
 510     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 511                              namefield=self.SORT_FIELD)
 512     self.requested_data = self.query.RequestedData()
 513     self.names = self.query.RequestedNames()
 514
 515     # Sort only if no names were requested
 516     self.sort_by_name = not self.names
 517
 518     self.do_locking = None
 519     self.wanted = None
 520
 521   def _GetNames(self, lu, all_names, lock_level):
 522     """Helper function to determine names asked for in the query.
 523
 524     """
 525     if self.do_locking:
 526       names = lu.owned_locks(lock_level)
 527     else:
 528       names = all_names
 529
 530     if self.wanted == locking.ALL_SET:
 531       assert not self.names
 532       # caller didn't specify names, so ordering is not important
 533       return utils.NiceSort(names)
 534
 535     # caller specified names and we must keep the same order
 536     assert self.names
 537     assert not self.do_locking or lu.glm.is_owned(lock_level)
 538
 539     missing = set(self.wanted).difference(names)
 540     if missing:
 541       raise errors.OpExecError("Some items were removed before retrieving"
 542                                " their data: %s" % missing)
 543
 544     # Return expanded names
 545     return self.wanted
 546
 547   def ExpandNames(self, lu):
 548     """Expand names for this query.
 549
 550     See L{LogicalUnit.ExpandNames}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def DeclareLocks(self, lu, level):
 556     """Declare locks for this query.
 557
 558     See L{LogicalUnit.DeclareLocks}.
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def _GetQueryData(self, lu):
 564     """Collects all data for this query.
 565
 566     @return: Query data object
 567
 568     """
 569     raise NotImplementedError()
 570
 571   def NewStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 576                                   sort_by_name=self.sort_by_name)
 577
 578   def OldStyleQuery(self, lu):
 579     """Collect data and execute query.
 580
 581     """
 582     return self.query.OldStyleQuery(self._GetQueryData(lu),
 583                                     sort_by_name=self.sort_by_name)
 584
 585
 586 def _ShareAll():
 587   """Returns a dict declaring all lock levels shared.
 588
 589   """
 590   return dict.fromkeys(locking.LEVELS, 1)
 591
 592
 593 def _AnnotateDiskParams(instance, devs, cfg):
 594   """Little helper wrapper to the rpc annotation method.
 595
 596   @param instance: The instance object
 597   @type devs: List of L{objects.Disk}
 598   @param devs: The root devices (not any of its children!)
 599   @param cfg: The config object
 600   @returns The annotated disk copies
 601   @see L{rpc.AnnotateDiskParams}
 602
 603   """
 604   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 605                                 cfg.GetInstanceDiskParams(instance))
 606
 607
 608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 609                               cur_group_uuid):
 610   """Checks if node groups for locked instances are still correct.
 611
 612   @type cfg: L{config.ConfigWriter}
 613   @param cfg: Cluster configuration
 614   @type instances: dict; string as key, L{objects.Instance} as value
 615   @param instances: Dictionary, instance name as key, instance object as value
 616   @type owned_groups: iterable of string
 617   @param owned_groups: List of owned groups
 618   @type owned_nodes: iterable of string
 619   @param owned_nodes: List of owned nodes
 620   @type cur_group_uuid: string or None
 621   @param cur_group_uuid: Optional group UUID to check against instance's groups
 622
 623   """
 624   for (name, inst) in instances.items():
 625     assert owned_nodes.issuperset(inst.all_nodes), \
 626       "Instance %s's nodes changed while we kept the lock" % name
 627
 628     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 629
 630     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 631       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 632
 633
 634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 635                              primary_only=False):
 636   """Checks if the owned node groups are still correct for an instance.
 637
 638   @type cfg: L{config.ConfigWriter}
 639   @param cfg: The cluster configuration
 640   @type instance_name: string
 641   @param instance_name: Instance name
 642   @type owned_groups: set or frozenset
 643   @param owned_groups: List of currently owned node groups
 644   @type primary_only: boolean
 645   @param primary_only: Whether to check node groups for only the primary node
 646
 647   """
 648   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 649
 650   if not owned_groups.issuperset(inst_groups):
 651     raise errors.OpPrereqError("Instance %s's node groups changed since"
 652                                " locks were acquired, current groups are"
 653                                " are '%s', owning groups '%s'; retry the"
 654                                " operation" %
 655                                (instance_name,
 656                                 utils.CommaJoin(inst_groups),
 657                                 utils.CommaJoin(owned_groups)),
 658                                errors.ECODE_STATE)
 659
 660   return inst_groups
 661
 662
 663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 664   """Checks if the instances in a node group are still correct.
 665
 666   @type cfg: L{config.ConfigWriter}
 667   @param cfg: The cluster configuration
 668   @type group_uuid: string
 669   @param group_uuid: Node group UUID
 670   @type owned_instances: set or frozenset
 671   @param owned_instances: List of currently owned instances
 672
 673   """
 674   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 675   if owned_instances != wanted_instances:
 676     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 677                                " locks were acquired, wanted '%s', have '%s';"
 678                                " retry the operation" %
 679                                (group_uuid,
 680                                 utils.CommaJoin(wanted_instances),
 681                                 utils.CommaJoin(owned_instances)),
 682                                errors.ECODE_STATE)
 683
 684   return wanted_instances
 685
 686
 687 def _SupportsOob(cfg, node):
 688   """Tells if node supports OOB.
 689
 690   @type cfg: L{config.ConfigWriter}
 691   @param cfg: The cluster configuration
 692   @type node: L{objects.Node}
 693   @param node: The node
 694   @return: The OOB script if supported or an empty string otherwise
 695
 696   """
 697   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 698
 699
 700 def _IsExclusiveStorageEnabledNode(cfg, node):
 701   """Whether exclusive_storage is in effect for the given node.
 702
 703   @type cfg: L{config.ConfigWriter}
 704   @param cfg: The cluster configuration
 705   @type node: L{objects.Node}
 706   @param node: The node
 707   @rtype: bool
 708   @return: The effective value of exclusive_storage
 709
 710   """
 711   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 712
 713
 714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 715   """Whether exclusive_storage is in effect for the given node.
 716
 717   @type cfg: L{config.ConfigWriter}
 718   @param cfg: The cluster configuration
 719   @type nodename: string
 720   @param nodename: The node
 721   @rtype: bool
 722   @return: The effective value of exclusive_storage
 723   @raise errors.OpPrereqError: if no node exists with the given name
 724
 725   """
 726   ni = cfg.GetNodeInfo(nodename)
 727   if ni is None:
 728     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 729                                errors.ECODE_NOENT)
 730   return _IsExclusiveStorageEnabledNode(cfg, ni)
 731
 732
 733 def _CopyLockList(names):
 734   """Makes a copy of a list of lock names.
 735
 736   Handles L{locking.ALL_SET} correctly.
 737
 738   """
 739   if names == locking.ALL_SET:
 740     return locking.ALL_SET
 741   else:
 742     return names[:]
 743
 744
 745 def _GetWantedNodes(lu, nodes):
 746   """Returns list of checked and expanded node names.
 747
 748   @type lu: L{LogicalUnit}
 749   @param lu: the logical unit on whose behalf we execute
 750   @type nodes: list
 751   @param nodes: list of node names or None for all nodes
 752   @rtype: list
 753   @return: the list of nodes, sorted
 754   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 755
 756   """
 757   if nodes:
 758     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 759
 760   return utils.NiceSort(lu.cfg.GetNodeList())
 761
 762
 763 def _GetWantedInstances(lu, instances):
 764   """Returns list of checked and expanded instance names.
 765
 766   @type lu: L{LogicalUnit}
 767   @param lu: the logical unit on whose behalf we execute
 768   @type instances: list
 769   @param instances: list of instance names or None for all instances
 770   @rtype: list
 771   @return: the list of instances, sorted
 772   @raise errors.OpPrereqError: if the instances parameter is wrong type
 773   @raise errors.OpPrereqError: if any of the passed instances is not found
 774
 775   """
 776   if instances:
 777     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 778   else:
 779     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 780   return wanted
 781
 782
 783 def _GetUpdatedParams(old_params, update_dict,
 784                       use_default=True, use_none=False):
 785   """Return the new version of a parameter dictionary.
 786
 787   @type old_params: dict
 788   @param old_params: old parameters
 789   @type update_dict: dict
 790   @param update_dict: dict containing new parameter values, or
 791       constants.VALUE_DEFAULT to reset the parameter to its default
 792       value
 793   @param use_default: boolean
 794   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 795       values as 'to be deleted' values
 796   @param use_none: boolean
 797   @type use_none: whether to recognise C{None} values as 'to be
 798       deleted' values
 799   @rtype: dict
 800   @return: the new parameter dictionary
 801
 802   """
 803   params_copy = copy.deepcopy(old_params)
 804   for key, val in update_dict.iteritems():
 805     if ((use_default and val == constants.VALUE_DEFAULT) or
 806         (use_none and val is None)):
 807       try:
 808         del params_copy[key]
 809       except KeyError:
 810         pass
 811     else:
 812       params_copy[key] = val
 813   return params_copy
 814
 815
 816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 817   """Return the new version of a instance policy.
 818
 819   @param group_policy: whether this policy applies to a group and thus
 820     we should support removal of policy entries
 821
 822   """
 823   use_none = use_default = group_policy
 824   ipolicy = copy.deepcopy(old_ipolicy)
 825   for key, value in new_ipolicy.items():
 826     if key not in constants.IPOLICY_ALL_KEYS:
 827       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 828                                  errors.ECODE_INVAL)
 829     if key in constants.IPOLICY_ISPECS:
 830       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 831       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 832                                        use_none=use_none,
 833                                        use_default=use_default)
 834     else:
 835       if (not value or value == [constants.VALUE_DEFAULT] or
 836           value == constants.VALUE_DEFAULT):
 837         if group_policy:
 838           del ipolicy[key]
 839         else:
 840           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 841                                      " on the cluster'" % key,
 842                                      errors.ECODE_INVAL)
 843       else:
 844         if key in constants.IPOLICY_PARAMETERS:
 845           # FIXME: we assume all such values are float
 846           try:
 847             ipolicy[key] = float(value)
 848           except (TypeError, ValueError), err:
 849             raise errors.OpPrereqError("Invalid value for attribute"
 850                                        " '%s': '%s', error: %s" %
 851                                        (key, value, err), errors.ECODE_INVAL)
 852         else:
 853           # FIXME: we assume all others are lists; this should be redone
 854           # in a nicer way
 855           ipolicy[key] = list(value)
 856   try:
 857     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 858   except errors.ConfigurationError, err:
 859     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 860                                errors.ECODE_INVAL)
 861   return ipolicy
 862
 863
 864 def _UpdateAndVerifySubDict(base, updates, type_check):
 865   """Updates and verifies a dict with sub dicts of the same type.
 866
 867   @param base: The dict with the old data
 868   @param updates: The dict with the new data
 869   @param type_check: Dict suitable to ForceDictType to verify correct types
 870   @returns: A new dict with updated and verified values
 871
 872   """
 873   def fn(old, value):
 874     new = _GetUpdatedParams(old, value)
 875     utils.ForceDictType(new, type_check)
 876     return new
 877
 878   ret = copy.deepcopy(base)
 879   ret.update(dict((key, fn(base.get(key, {}), value))
 880                   for key, value in updates.items()))
 881   return ret
 882
 883
 884 def _MergeAndVerifyHvState(op_input, obj_input):
 885   """Combines the hv state from an opcode with the one of the object
 886
 887   @param op_input: The input dict from the opcode
 888   @param obj_input: The input dict from the objects
 889   @return: The verified and updated dict
 890
 891   """
 892   if op_input:
 893     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 894     if invalid_hvs:
 895       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 896                                  " %s" % utils.CommaJoin(invalid_hvs),
 897                                  errors.ECODE_INVAL)
 898     if obj_input is None:
 899       obj_input = {}
 900     type_check = constants.HVSTS_PARAMETER_TYPES
 901     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 902
 903   return None
 904
 905
 906 def _MergeAndVerifyDiskState(op_input, obj_input):
 907   """Combines the disk state from an opcode with the one of the object
 908
 909   @param op_input: The input dict from the opcode
 910   @param obj_input: The input dict from the objects
 911   @return: The verified and updated dict
 912   """
 913   if op_input:
 914     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 915     if invalid_dst:
 916       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 917                                  utils.CommaJoin(invalid_dst),
 918                                  errors.ECODE_INVAL)
 919     type_check = constants.DSS_PARAMETER_TYPES
 920     if obj_input is None:
 921       obj_input = {}
 922     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 923                                               type_check))
 924                 for key, value in op_input.items())
 925
 926   return None
 927
 928
 929 def _ReleaseLocks(lu, level, names=None, keep=None):
 930   """Releases locks owned by an LU.
 931
 932   @type lu: L{LogicalUnit}
 933   @param level: Lock level
 934   @type names: list or None
 935   @param names: Names of locks to release
 936   @type keep: list or None
 937   @param keep: Names of locks to retain
 938
 939   """
 940   assert not (keep is not None and names is not None), \
 941          "Only one of the 'names' and the 'keep' parameters can be given"
 942
 943   if names is not None:
 944     should_release = names.__contains__
 945   elif keep:
 946     should_release = lambda name: name not in keep
 947   else:
 948     should_release = None
 949
 950   owned = lu.owned_locks(level)
 951   if not owned:
 952     # Not owning any lock at this level, do nothing
 953     pass
 954
 955   elif should_release:
 956     retain = []
 957     release = []
 958
 959     # Determine which locks to release
 960     for name in owned:
 961       if should_release(name):
 962         release.append(name)
 963       else:
 964         retain.append(name)
 965
 966     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 967
 968     # Release just some locks
 969     lu.glm.release(level, names=release)
 970
 971     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 972   else:
 973     # Release everything
 974     lu.glm.release(level)
 975
 976     assert not lu.glm.is_owned(level), "No locks should be owned"
 977
 978
 979 def _MapInstanceDisksToNodes(instances):
 980   """Creates a map from (node, volume) to instance name.
 981
 982   @type instances: list of L{objects.Instance}
 983   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 984
 985   """
 986   return dict(((node, vol), inst.name)
 987               for inst in instances
 988               for (node, vols) in inst.MapLVsByNode().items()
 989               for vol in vols)
 990
 991
 992 def _RunPostHook(lu, node_name):
 993   """Runs the post-hook for an opcode on a single node.
 994
 995   """
 996   hm = lu.proc.BuildHooksManager(lu)
 997   try:
 998     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 999   except Exception, err: # pylint: disable=W0703
1000     lu.LogWarning("Errors occurred running hooks on %s: %s",
1001                   node_name, err)
1002
1003
1004 def _CheckOutputFields(static, dynamic, selected):
1005   """Checks whether all selected fields are valid.
1006
1007   @type static: L{utils.FieldSet}
1008   @param static: static fields set
1009   @type dynamic: L{utils.FieldSet}
1010   @param dynamic: dynamic fields set
1011
1012   """
1013   f = utils.FieldSet()
1014   f.Extend(static)
1015   f.Extend(dynamic)
1016
1017   delta = f.NonMatching(selected)
1018   if delta:
1019     raise errors.OpPrereqError("Unknown output fields selected: %s"
1020                                % ",".join(delta), errors.ECODE_INVAL)
1021
1022
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024   """Make sure that none of the given paramters is global.
1025
1026   If a global parameter is found, an L{errors.OpPrereqError} exception is
1027   raised. This is used to avoid setting global parameters for individual nodes.
1028
1029   @type params: dictionary
1030   @param params: Parameters to check
1031   @type glob_pars: dictionary
1032   @param glob_pars: Forbidden parameters
1033   @type kind: string
1034   @param kind: Kind of parameters (e.g. "node")
1035   @type bad_levels: string
1036   @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1037       "instance")
1038   @type good_levels: strings
1039   @param good_levels: Level(s) at which the parameters are allowed (e.g.
1040       "cluster or group")
1041
1042   """
1043   used_globals = glob_pars.intersection(params)
1044   if used_globals:
1045     msg = ("The following %s parameters are global and cannot"
1046            " be customized at %s level, please modify them at"
1047            " %s level: %s" %
1048            (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051
1052 def _CheckNodeOnline(lu, node, msg=None):
1053   """Ensure that a given node is online.
1054
1055   @param lu: the LU on behalf of which we make the check
1056   @param node: the node to check
1057   @param msg: if passed, should be a message to replace the default one
1058   @raise errors.OpPrereqError: if the node is offline
1059
1060   """
1061   if msg is None:
1062     msg = "Can't use offline node"
1063   if lu.cfg.GetNodeInfo(node).offline:
1064     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066
1067 def _CheckNodeNotDrained(lu, node):
1068   """Ensure that a given node is not drained.
1069
1070   @param lu: the LU on behalf of which we make the check
1071   @param node: the node to check
1072   @raise errors.OpPrereqError: if the node is drained
1073
1074   """
1075   if lu.cfg.GetNodeInfo(node).drained:
1076     raise errors.OpPrereqError("Can't use drained node %s" % node,
1077                                errors.ECODE_STATE)
1078
1079
1080 def _CheckNodeVmCapable(lu, node):
1081   """Ensure that a given node is vm capable.
1082
1083   @param lu: the LU on behalf of which we make the check
1084   @param node: the node to check
1085   @raise errors.OpPrereqError: if the node is not vm capable
1086
1087   """
1088   if not lu.cfg.GetNodeInfo(node).vm_capable:
1089     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1090                                errors.ECODE_STATE)
1091
1092
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094   """Ensure that a node supports a given OS.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param node: the node to check
1098   @param os_name: the OS to query about
1099   @param force_variant: whether to ignore variant errors
1100   @raise errors.OpPrereqError: if the node is not supporting the OS
1101
1102   """
1103   result = lu.rpc.call_os_get(node, os_name)
1104   result.Raise("OS '%s' not in supported OS list for node %s" %
1105                (os_name, node),
1106                prereq=True, ecode=errors.ECODE_INVAL)
1107   if not force_variant:
1108     _CheckOSVariant(result.payload, os_name)
1109
1110
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112   """Ensure that a node has the given secondary ip.
1113
1114   @type lu: L{LogicalUnit}
1115   @param lu: the LU on behalf of which we make the check
1116   @type node: string
1117   @param node: the node to check
1118   @type secondary_ip: string
1119   @param secondary_ip: the ip to check
1120   @type prereq: boolean
1121   @param prereq: whether to throw a prerequisite or an execute error
1122   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1124
1125   """
1126   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127   result.Raise("Failure checking secondary ip on node %s" % node,
1128                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129   if not result.payload:
1130     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131            " please fix and re-run this command" % secondary_ip)
1132     if prereq:
1133       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1134     else:
1135       raise errors.OpExecError(msg)
1136
1137
1138 def _CheckNodePVs(nresult, exclusive_storage):
1139   """Check node PVs.
1140
1141   """
1142   pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143   if pvlist_dict is None:
1144     return (["Can't get PV list from node"], None)
1145   pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1146   errlist = []
1147   # check that ':' is not present in PV names, since it's a
1148   # special character for lvcreate (denotes the range of PEs to
1149   # use on the PV)
1150   for pv in pvlist:
1151     if ":" in pv.name:
1152       errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153                      (pv.name, pv.vg_name))
1154   es_pvinfo = None
1155   if exclusive_storage:
1156     (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157     errlist.extend(errmsgs)
1158     shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1159     if shared_pvs:
1160       for (pvname, lvlist) in shared_pvs:
1161         # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162         errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163                        (pvname, utils.CommaJoin(lvlist)))
1164   return (errlist, es_pvinfo)
1165
1166
1167 def _GetClusterDomainSecret():
1168   """Reads the cluster domain secret.
1169
1170   """
1171   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1172                                strict=True)
1173
1174
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176   """Ensure that an instance is in one of the required states.
1177
1178   @param lu: the LU on behalf of which we make the check
1179   @param instance: the instance to check
1180   @param msg: if passed, should be a message to replace the default one
1181   @raise errors.OpPrereqError: if the instance is not in the required state
1182
1183   """
1184   if msg is None:
1185     msg = ("can't use instance from outside %s states" %
1186            utils.CommaJoin(req_states))
1187   if instance.admin_state not in req_states:
1188     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189                                (instance.name, instance.admin_state, msg),
1190                                errors.ECODE_STATE)
1191
1192   if constants.ADMINST_UP not in req_states:
1193     pnode = instance.primary_node
1194     if not lu.cfg.GetNodeInfo(pnode).offline:
1195       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197                   prereq=True, ecode=errors.ECODE_ENVIRON)
1198       if instance.name in ins_l.payload:
1199         raise errors.OpPrereqError("Instance %s is running, %s" %
1200                                    (instance.name, msg), errors.ECODE_STATE)
1201     else:
1202       lu.LogWarning("Primary node offline, ignoring check that instance"
1203                      " is down")
1204
1205
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207   """Computes if value is in the desired range.
1208
1209   @param name: name of the parameter for which we perform the check
1210   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1211       not just 'disk')
1212   @param ipolicy: dictionary containing min, max and std values
1213   @param value: actual value that we want to use
1214   @return: None or element not meeting the criteria
1215
1216
1217   """
1218   if value in [None, constants.VALUE_AUTO]:
1219     return None
1220   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222   if value > max_v or min_v > value:
1223     if qualifier:
1224       fqn = "%s/%s" % (name, qualifier)
1225     else:
1226       fqn = name
1227     return ("%s value %s is not in range [%s, %s]" %
1228             (fqn, value, min_v, max_v))
1229   return None
1230
1231
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233                                  nic_count, disk_sizes, spindle_use,
1234                                  _compute_fn=_ComputeMinMaxSpec):
1235   """Verifies ipolicy against provided specs.
1236
1237   @type ipolicy: dict
1238   @param ipolicy: The ipolicy
1239   @type mem_size: int
1240   @param mem_size: The memory size
1241   @type cpu_count: int
1242   @param cpu_count: Used cpu cores
1243   @type disk_count: int
1244   @param disk_count: Number of disks used
1245   @type nic_count: int
1246   @param nic_count: Number of nics used
1247   @type disk_sizes: list of ints
1248   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249   @type spindle_use: int
1250   @param spindle_use: The number of spindles this instance uses
1251   @param _compute_fn: The compute function (unittest only)
1252   @return: A list of violations, or an empty list of no violations are found
1253
1254   """
1255   assert disk_count == len(disk_sizes)
1256
1257   test_settings = [
1258     (constants.ISPEC_MEM_SIZE, "", mem_size),
1259     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260     (constants.ISPEC_DISK_COUNT, "", disk_count),
1261     (constants.ISPEC_NIC_COUNT, "", nic_count),
1262     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264          for idx, d in enumerate(disk_sizes)]
1265
1266   return filter(None,
1267                 (_compute_fn(name, qualifier, ipolicy, value)
1268                  for (name, qualifier, value) in test_settings))
1269
1270
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272                                      _compute_fn=_ComputeIPolicySpecViolation):
1273   """Compute if instance meets the specs of ipolicy.
1274
1275   @type ipolicy: dict
1276   @param ipolicy: The ipolicy to verify against
1277   @type instance: L{objects.Instance}
1278   @param instance: The instance to verify
1279   @param _compute_fn: The function to verify ipolicy (unittest only)
1280   @see: L{_ComputeIPolicySpecViolation}
1281
1282   """
1283   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286   disk_count = len(instance.disks)
1287   disk_sizes = [disk.size for disk in instance.disks]
1288   nic_count = len(instance.nics)
1289
1290   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291                      disk_sizes, spindle_use)
1292
1293
1294 def _ComputeIPolicyInstanceSpecViolation(
1295   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296   """Compute if instance specs meets the specs of ipolicy.
1297
1298   @type ipolicy: dict
1299   @param ipolicy: The ipolicy to verify against
1300   @param instance_spec: dict
1301   @param instance_spec: The instance spec to verify
1302   @param _compute_fn: The function to verify ipolicy (unittest only)
1303   @see: L{_ComputeIPolicySpecViolation}
1304
1305   """
1306   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1312
1313   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314                      disk_sizes, spindle_use)
1315
1316
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1318                                  target_group,
1319                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1320   """Compute if instance meets the specs of the new target group.
1321
1322   @param ipolicy: The ipolicy to verify
1323   @param instance: The instance object to verify
1324   @param current_group: The current group of the instance
1325   @param target_group: The new group of the instance
1326   @param _compute_fn: The function to verify ipolicy (unittest only)
1327   @see: L{_ComputeIPolicySpecViolation}
1328
1329   """
1330   if current_group == target_group:
1331     return []
1332   else:
1333     return _compute_fn(ipolicy, instance)
1334
1335
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337                             _compute_fn=_ComputeIPolicyNodeViolation):
1338   """Checks that the target node is correct in terms of instance policy.
1339
1340   @param ipolicy: The ipolicy to verify
1341   @param instance: The instance object to verify
1342   @param node: The new node to relocate
1343   @param ignore: Ignore violations of the ipolicy
1344   @param _compute_fn: The function to verify ipolicy (unittest only)
1345   @see: L{_ComputeIPolicySpecViolation}
1346
1347   """
1348   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1350
1351   if res:
1352     msg = ("Instance does not meet target node group's (%s) instance"
1353            " policy: %s") % (node.group, utils.CommaJoin(res))
1354     if ignore:
1355       lu.LogWarning(msg)
1356     else:
1357       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1358
1359
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361   """Computes a set of any instances that would violate the new ipolicy.
1362
1363   @param old_ipolicy: The current (still in-place) ipolicy
1364   @param new_ipolicy: The new (to become) ipolicy
1365   @param instances: List of instances to verify
1366   @return: A list of instances which violates the new ipolicy but
1367       did not before
1368
1369   """
1370   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371           _ComputeViolatingInstances(old_ipolicy, instances))
1372
1373
1374 def _ExpandItemName(fn, name, kind):
1375   """Expand an item name.
1376
1377   @param fn: the function to use for expansion
1378   @param name: requested item name
1379   @param kind: text description ('Node' or 'Instance')
1380   @return: the resolved (full) name
1381   @raise errors.OpPrereqError: if the item is not found
1382
1383   """
1384   full_name = fn(name)
1385   if full_name is None:
1386     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1387                                errors.ECODE_NOENT)
1388   return full_name
1389
1390
1391 def _ExpandNodeName(cfg, name):
1392   """Wrapper over L{_ExpandItemName} for nodes."""
1393   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1394
1395
1396 def _ExpandInstanceName(cfg, name):
1397   """Wrapper over L{_ExpandItemName} for instance."""
1398   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1399
1400
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1402                          mac_prefix, tags):
1403   """Builds network related env variables for hooks
1404
1405   This builds the hook environment from individual variables.
1406
1407   @type name: string
1408   @param name: the name of the network
1409   @type subnet: string
1410   @param subnet: the ipv4 subnet
1411   @type gateway: string
1412   @param gateway: the ipv4 gateway
1413   @type network6: string
1414   @param network6: the ipv6 subnet
1415   @type gateway6: string
1416   @param gateway6: the ipv6 gateway
1417   @type mac_prefix: string
1418   @param mac_prefix: the mac_prefix
1419   @type tags: list
1420   @param tags: the tags of the network
1421
1422   """
1423   env = {}
1424   if name:
1425     env["NETWORK_NAME"] = name
1426   if subnet:
1427     env["NETWORK_SUBNET"] = subnet
1428   if gateway:
1429     env["NETWORK_GATEWAY"] = gateway
1430   if network6:
1431     env["NETWORK_SUBNET6"] = network6
1432   if gateway6:
1433     env["NETWORK_GATEWAY6"] = gateway6
1434   if mac_prefix:
1435     env["NETWORK_MAC_PREFIX"] = mac_prefix
1436   if tags:
1437     env["NETWORK_TAGS"] = " ".join(tags)
1438
1439   return env
1440
1441
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443                           minmem, maxmem, vcpus, nics, disk_template, disks,
1444                           bep, hvp, hypervisor_name, tags):
1445   """Builds instance related env variables for hooks
1446
1447   This builds the hook environment from individual variables.
1448
1449   @type name: string
1450   @param name: the name of the instance
1451   @type primary_node: string
1452   @param primary_node: the name of the instance's primary node
1453   @type secondary_nodes: list
1454   @param secondary_nodes: list of secondary nodes as strings
1455   @type os_type: string
1456   @param os_type: the name of the instance's OS
1457   @type status: string
1458   @param status: the desired status of the instance
1459   @type minmem: string
1460   @param minmem: the minimum memory size of the instance
1461   @type maxmem: string
1462   @param maxmem: the maximum memory size of the instance
1463   @type vcpus: string
1464   @param vcpus: the count of VCPUs the instance has
1465   @type nics: list
1466   @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467       the NICs the instance has
1468   @type disk_template: string
1469   @param disk_template: the disk template of the instance
1470   @type disks: list
1471   @param disks: the list of (size, mode) pairs
1472   @type bep: dict
1473   @param bep: the backend parameters for the instance
1474   @type hvp: dict
1475   @param hvp: the hypervisor parameters for the instance
1476   @type hypervisor_name: string
1477   @param hypervisor_name: the hypervisor for the instance
1478   @type tags: list
1479   @param tags: list of instance tags as strings
1480   @rtype: dict
1481   @return: the hook environment for this instance
1482
1483   """
1484   env = {
1485     "OP_TARGET": name,
1486     "INSTANCE_NAME": name,
1487     "INSTANCE_PRIMARY": primary_node,
1488     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489     "INSTANCE_OS_TYPE": os_type,
1490     "INSTANCE_STATUS": status,
1491     "INSTANCE_MINMEM": minmem,
1492     "INSTANCE_MAXMEM": maxmem,
1493     # TODO(2.7) remove deprecated "memory" value
1494     "INSTANCE_MEMORY": maxmem,
1495     "INSTANCE_VCPUS": vcpus,
1496     "INSTANCE_DISK_TEMPLATE": disk_template,
1497     "INSTANCE_HYPERVISOR": hypervisor_name,
1498   }
1499   if nics:
1500     nic_count = len(nics)
1501     for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1502       if ip is None:
1503         ip = ""
1504       env["INSTANCE_NIC%d_IP" % idx] = ip
1505       env["INSTANCE_NIC%d_MAC" % idx] = mac
1506       env["INSTANCE_NIC%d_MODE" % idx] = mode
1507       env["INSTANCE_NIC%d_LINK" % idx] = link
1508       if netinfo:
1509         nobj = objects.Network.FromDict(netinfo)
1510         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1511       elif network:
1512         # FIXME: broken network reference: the instance NIC specifies a
1513         # network, but the relevant network entry was not in the config. This
1514         # should be made impossible.
1515         env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516       if mode == constants.NIC_MODE_BRIDGED:
1517         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1518   else:
1519     nic_count = 0
1520
1521   env["INSTANCE_NIC_COUNT"] = nic_count
1522
1523   if disks:
1524     disk_count = len(disks)
1525     for idx, (size, mode) in enumerate(disks):
1526       env["INSTANCE_DISK%d_SIZE" % idx] = size
1527       env["INSTANCE_DISK%d_MODE" % idx] = mode
1528   else:
1529     disk_count = 0
1530
1531   env["INSTANCE_DISK_COUNT"] = disk_count
1532
1533   if not tags:
1534     tags = []
1535
1536   env["INSTANCE_TAGS"] = " ".join(tags)
1537
1538   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539     for key, value in source.items():
1540       env["INSTANCE_%s_%s" % (kind, key)] = value
1541
1542   return env
1543
1544
1545 def _NICToTuple(lu, nic):
1546   """Build a tupple of nic information.
1547
1548   @type lu:  L{LogicalUnit}
1549   @param lu: the logical unit on whose behalf we execute
1550   @type nic: L{objects.NIC}
1551   @param nic: nic to convert to hooks tuple
1552
1553   """
1554   cluster = lu.cfg.GetClusterInfo()
1555   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556   mode = filled_params[constants.NIC_MODE]
1557   link = filled_params[constants.NIC_LINK]
1558   netinfo = None
1559   if nic.network:
1560     nobj = lu.cfg.GetNetwork(nic.network)
1561     netinfo = objects.Network.ToDict(nobj)
1562   return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1563
1564
1565 def _NICListToTuple(lu, nics):
1566   """Build a list of nic information tuples.
1567
1568   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569   value in LUInstanceQueryData.
1570
1571   @type lu:  L{LogicalUnit}
1572   @param lu: the logical unit on whose behalf we execute
1573   @type nics: list of L{objects.NIC}
1574   @param nics: list of nics to convert to hooks tuples
1575
1576   """
1577   hooks_nics = []
1578   for nic in nics:
1579     hooks_nics.append(_NICToTuple(lu, nic))
1580   return hooks_nics
1581
1582
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584   """Builds instance related env variables for hooks from an object.
1585
1586   @type lu: L{LogicalUnit}
1587   @param lu: the logical unit on whose behalf we execute
1588   @type instance: L{objects.Instance}
1589   @param instance: the instance for which we should build the
1590       environment
1591   @type override: dict
1592   @param override: dictionary with key/values that will override
1593       our values
1594   @rtype: dict
1595   @return: the hook environment dictionary
1596
1597   """
1598   cluster = lu.cfg.GetClusterInfo()
1599   bep = cluster.FillBE(instance)
1600   hvp = cluster.FillHV(instance)
1601   args = {
1602     "name": instance.name,
1603     "primary_node": instance.primary_node,
1604     "secondary_nodes": instance.secondary_nodes,
1605     "os_type": instance.os,
1606     "status": instance.admin_state,
1607     "maxmem": bep[constants.BE_MAXMEM],
1608     "minmem": bep[constants.BE_MINMEM],
1609     "vcpus": bep[constants.BE_VCPUS],
1610     "nics": _NICListToTuple(lu, instance.nics),
1611     "disk_template": instance.disk_template,
1612     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1613     "bep": bep,
1614     "hvp": hvp,
1615     "hypervisor_name": instance.hypervisor,
1616     "tags": instance.tags,
1617   }
1618   if override:
1619     args.update(override)
1620   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1621
1622
1623 def _AdjustCandidatePool(lu, exceptions):
1624   """Adjust the candidate pool after node operations.
1625
1626   """
1627   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1628   if mod_list:
1629     lu.LogInfo("Promoted nodes to master candidate role: %s",
1630                utils.CommaJoin(node.name for node in mod_list))
1631     for name in mod_list:
1632       lu.context.ReaddNode(name)
1633   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1634   if mc_now > mc_max:
1635     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1636                (mc_now, mc_max))
1637
1638
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640   """Decide whether I should promote myself as a master candidate.
1641
1642   """
1643   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645   # the new node will increase mc_max with one, so:
1646   mc_should = min(mc_should + 1, cp_size)
1647   return mc_now < mc_should
1648
1649
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651   """Computes a set of instances who violates given ipolicy.
1652
1653   @param ipolicy: The ipolicy to verify
1654   @type instances: object.Instance
1655   @param instances: List of instances to verify
1656   @return: A frozenset of instance names violating the ipolicy
1657
1658   """
1659   return frozenset([inst.name for inst in instances
1660                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1661
1662
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664   """Check that the brigdes needed by a list of nics exist.
1665
1666   """
1667   cluster = lu.cfg.GetClusterInfo()
1668   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669   brlist = [params[constants.NIC_LINK] for params in paramslist
1670             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1671   if brlist:
1672     result = lu.rpc.call_bridges_exist(target_node, brlist)
1673     result.Raise("Error checking bridges on destination node '%s'" %
1674                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1675
1676
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678   """Check that the brigdes needed by an instance exist.
1679
1680   """
1681   if node is None:
1682     node = instance.primary_node
1683   _CheckNicsBridgesExist(lu, instance.nics, node)
1684
1685
1686 def _CheckOSVariant(os_obj, name):
1687   """Check whether an OS name conforms to the os variants specification.
1688
1689   @type os_obj: L{objects.OS}
1690   @param os_obj: OS object to check
1691   @type name: string
1692   @param name: OS name passed by the user, to check for validity
1693
1694   """
1695   variant = objects.OS.GetVariant(name)
1696   if not os_obj.supported_variants:
1697     if variant:
1698       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699                                  " passed)" % (os_obj.name, variant),
1700                                  errors.ECODE_INVAL)
1701     return
1702   if not variant:
1703     raise errors.OpPrereqError("OS name must include a variant",
1704                                errors.ECODE_INVAL)
1705
1706   if variant not in os_obj.supported_variants:
1707     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1708
1709
1710 def _GetNodeInstancesInner(cfg, fn):
1711   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1712
1713
1714 def _GetNodeInstances(cfg, node_name):
1715   """Returns a list of all primary and secondary instances on a node.
1716
1717   """
1718
1719   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1720
1721
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723   """Returns primary instances on a node.
1724
1725   """
1726   return _GetNodeInstancesInner(cfg,
1727                                 lambda inst: node_name == inst.primary_node)
1728
1729
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731   """Returns secondary instances on a node.
1732
1733   """
1734   return _GetNodeInstancesInner(cfg,
1735                                 lambda inst: node_name in inst.secondary_nodes)
1736
1737
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739   """Returns the arguments for a storage type.
1740
1741   """
1742   # Special case for file storage
1743   if storage_type == constants.ST_FILE:
1744     # storage.FileStorage wants a list of storage directories
1745     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1746
1747   return []
1748
1749
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1751   faulty = []
1752
1753   for dev in instance.disks:
1754     cfg.SetDiskID(dev, node_name)
1755
1756   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1757                                                                 instance))
1758   result.Raise("Failed to get disk status from node %s" % node_name,
1759                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1760
1761   for idx, bdev_status in enumerate(result.payload):
1762     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1763       faulty.append(idx)
1764
1765   return faulty
1766
1767
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769   """Check the sanity of iallocator and node arguments and use the
1770   cluster-wide iallocator if appropriate.
1771
1772   Check that at most one of (iallocator, node) is specified. If none is
1773   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774   then the LU's opcode's iallocator slot is filled with the cluster-wide
1775   default iallocator.
1776
1777   @type iallocator_slot: string
1778   @param iallocator_slot: the name of the opcode iallocator slot
1779   @type node_slot: string
1780   @param node_slot: the name of the opcode target node slot
1781
1782   """
1783   node = getattr(lu.op, node_slot, None)
1784   ialloc = getattr(lu.op, iallocator_slot, None)
1785   if node == []:
1786     node = None
1787
1788   if node is not None and ialloc is not None:
1789     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1790                                errors.ECODE_INVAL)
1791   elif ((node is None and ialloc is None) or
1792         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793     default_iallocator = lu.cfg.GetDefaultIAllocator()
1794     if default_iallocator:
1795       setattr(lu.op, iallocator_slot, default_iallocator)
1796     else:
1797       raise errors.OpPrereqError("No iallocator or node given and no"
1798                                  " cluster-wide default iallocator found;"
1799                                  " please specify either an iallocator or a"
1800                                  " node, or set a cluster-wide default"
1801                                  " iallocator", errors.ECODE_INVAL)
1802
1803
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805   """Decides on which iallocator to use.
1806
1807   @type cfg: L{config.ConfigWriter}
1808   @param cfg: Cluster configuration object
1809   @type ialloc: string or None
1810   @param ialloc: Iallocator specified in opcode
1811   @rtype: string
1812   @return: Iallocator name
1813
1814   """
1815   if not ialloc:
1816     # Use default iallocator
1817     ialloc = cfg.GetDefaultIAllocator()
1818
1819   if not ialloc:
1820     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821                                " opcode nor as a cluster-wide default",
1822                                errors.ECODE_INVAL)
1823
1824   return ialloc
1825
1826
1827 def _CheckHostnameSane(lu, name):
1828   """Ensures that a given hostname resolves to a 'sane' name.
1829
1830   The given name is required to be a prefix of the resolved hostname,
1831   to prevent accidental mismatches.
1832
1833   @param lu: the logical unit on behalf of which we're checking
1834   @param name: the name we should resolve and check
1835   @return: the resolved hostname object
1836
1837   """
1838   hostname = netutils.GetHostname(name=name)
1839   if hostname.name != name:
1840     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841   if not utils.MatchNameComponent(name, [hostname.name]):
1842     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843                                 " same as given hostname '%s'") %
1844                                 (hostname.name, name), errors.ECODE_INVAL)
1845   return hostname
1846
1847
1848 class LUClusterPostInit(LogicalUnit):
1849   """Logical unit for running hooks after cluster initialization.
1850
1851   """
1852   HPATH = "cluster-init"
1853   HTYPE = constants.HTYPE_CLUSTER
1854
1855   def BuildHooksEnv(self):
1856     """Build hooks env.
1857
1858     """
1859     return {
1860       "OP_TARGET": self.cfg.GetClusterName(),
1861       }
1862
1863   def BuildHooksNodes(self):
1864     """Build hooks nodes.
1865
1866     """
1867     return ([], [self.cfg.GetMasterNode()])
1868
1869   def Exec(self, feedback_fn):
1870     """Nothing to do.
1871
1872     """
1873     return True
1874
1875
1876 class LUClusterDestroy(LogicalUnit):
1877   """Logical unit for destroying the cluster.
1878
1879   """
1880   HPATH = "cluster-destroy"
1881   HTYPE = constants.HTYPE_CLUSTER
1882
1883   def BuildHooksEnv(self):
1884     """Build hooks env.
1885
1886     """
1887     return {
1888       "OP_TARGET": self.cfg.GetClusterName(),
1889       }
1890
1891   def BuildHooksNodes(self):
1892     """Build hooks nodes.
1893
1894     """
1895     return ([], [])
1896
1897   def CheckPrereq(self):
1898     """Check prerequisites.
1899
1900     This checks whether the cluster is empty.
1901
1902     Any errors are signaled by raising errors.OpPrereqError.
1903
1904     """
1905     master = self.cfg.GetMasterNode()
1906
1907     nodelist = self.cfg.GetNodeList()
1908     if len(nodelist) != 1 or nodelist[0] != master:
1909       raise errors.OpPrereqError("There are still %d node(s) in"
1910                                  " this cluster." % (len(nodelist) - 1),
1911                                  errors.ECODE_INVAL)
1912     instancelist = self.cfg.GetInstanceList()
1913     if instancelist:
1914       raise errors.OpPrereqError("There are still %d instance(s) in"
1915                                  " this cluster." % len(instancelist),
1916                                  errors.ECODE_INVAL)
1917
1918   def Exec(self, feedback_fn):
1919     """Destroys the cluster.
1920
1921     """
1922     master_params = self.cfg.GetMasterNetworkParameters()
1923
1924     # Run post hooks on master node before it's removed
1925     _RunPostHook(self, master_params.name)
1926
1927     ems = self.cfg.GetUseExternalMipScript()
1928     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1929                                                      master_params, ems)
1930     if result.fail_msg:
1931       self.LogWarning("Error disabling the master IP address: %s",
1932                       result.fail_msg)
1933
1934     return master_params.name
1935
1936
1937 def _VerifyCertificate(filename):
1938   """Verifies a certificate for L{LUClusterVerifyConfig}.
1939
1940   @type filename: string
1941   @param filename: Path to PEM file
1942
1943   """
1944   try:
1945     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946                                            utils.ReadFile(filename))
1947   except Exception, err: # pylint: disable=W0703
1948     return (LUClusterVerifyConfig.ETYPE_ERROR,
1949             "Failed to load X509 certificate %s: %s" % (filename, err))
1950
1951   (errcode, msg) = \
1952     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953                                 constants.SSL_CERT_EXPIRATION_ERROR)
1954
1955   if msg:
1956     fnamemsg = "While verifying %s: %s" % (filename, msg)
1957   else:
1958     fnamemsg = None
1959
1960   if errcode is None:
1961     return (None, fnamemsg)
1962   elif errcode == utils.CERT_WARNING:
1963     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964   elif errcode == utils.CERT_ERROR:
1965     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1966
1967   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1968
1969
1970 def _GetAllHypervisorParameters(cluster, instances):
1971   """Compute the set of all hypervisor parameters.
1972
1973   @type cluster: L{objects.Cluster}
1974   @param cluster: the cluster object
1975   @param instances: list of L{objects.Instance}
1976   @param instances: additional instances from which to obtain parameters
1977   @rtype: list of (origin, hypervisor, parameters)
1978   @return: a list with all parameters found, indicating the hypervisor they
1979        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1980
1981   """
1982   hvp_data = []
1983
1984   for hv_name in cluster.enabled_hypervisors:
1985     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1986
1987   for os_name, os_hvp in cluster.os_hvp.items():
1988     for hv_name, hv_params in os_hvp.items():
1989       if hv_params:
1990         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1992
1993   # TODO: collapse identical parameter values in a single one
1994   for instance in instances:
1995     if instance.hvparams:
1996       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997                        cluster.FillHV(instance)))
1998
1999   return hvp_data
2000
2001
2002 class _VerifyErrors(object):
2003   """Mix-in for cluster/group verify LUs.
2004
2005   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006   self.op and self._feedback_fn to be available.)
2007
2008   """
2009
2010   ETYPE_FIELD = "code"
2011   ETYPE_ERROR = "ERROR"
2012   ETYPE_WARNING = "WARNING"
2013
2014   def _Error(self, ecode, item, msg, *args, **kwargs):
2015     """Format an error message.
2016
2017     Based on the opcode's error_codes parameter, either format a
2018     parseable error code, or a simpler error string.
2019
2020     This must be called only from Exec and functions called from Exec.
2021
2022     """
2023     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024     itype, etxt, _ = ecode
2025     # If the error code is in the list of ignored errors, demote the error to a
2026     # warning
2027     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
2028       ltype = self.ETYPE_WARNING
2029     # first complete the msg
2030     if args:
2031       msg = msg % args
2032     # then format the whole message
2033     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2035     else:
2036       if item:
2037         item = " " + item
2038       else:
2039         item = ""
2040       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041     # and finally report it via the feedback_fn
2042     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
2043     # do not mark the operation as failed for WARN cases only
2044     if ltype == self.ETYPE_ERROR:
2045       self.bad = True
2046
2047   def _ErrorIf(self, cond, *args, **kwargs):
2048     """Log an error message if the passed condition is True.
2049
2050     """
2051     if (bool(cond)
2052         or self.op.debug_simulate_errors): # pylint: disable=E1101
2053       self._Error(*args, **kwargs)
2054
2055
2056 class LUClusterVerify(NoHooksLU):
2057   """Submits all jobs necessary to verify the cluster.
2058
2059   """
2060   REQ_BGL = False
2061
2062   def ExpandNames(self):
2063     self.needed_locks = {}
2064
2065   def Exec(self, feedback_fn):
2066     jobs = []
2067
2068     if self.op.group_name:
2069       groups = [self.op.group_name]
2070       depends_fn = lambda: None
2071     else:
2072       groups = self.cfg.GetNodeGroupList()
2073
2074       # Verify global configuration
2075       jobs.append([
2076         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2077         ])
2078
2079       # Always depend on global verification
2080       depends_fn = lambda: [(-len(jobs), [])]
2081
2082     jobs.extend(
2083       [opcodes.OpClusterVerifyGroup(group_name=group,
2084                                     ignore_errors=self.op.ignore_errors,
2085                                     depends=depends_fn())]
2086       for group in groups)
2087
2088     # Fix up all parameters
2089     for op in itertools.chain(*jobs): # pylint: disable=W0142
2090       op.debug_simulate_errors = self.op.debug_simulate_errors
2091       op.verbose = self.op.verbose
2092       op.error_codes = self.op.error_codes
2093       try:
2094         op.skip_checks = self.op.skip_checks
2095       except AttributeError:
2096         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2097
2098     return ResultWithJobs(jobs)
2099
2100
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102   """Verifies the cluster config.
2103
2104   """
2105   REQ_BGL = False
2106
2107   def _VerifyHVP(self, hvp_data):
2108     """Verifies locally the syntax of the hypervisor parameters.
2109
2110     """
2111     for item, hv_name, hv_params in hvp_data:
2112       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2113              (item, hv_name))
2114       try:
2115         hv_class = hypervisor.GetHypervisorClass(hv_name)
2116         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117         hv_class.CheckParameterSyntax(hv_params)
2118       except errors.GenericError, err:
2119         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2120
2121   def ExpandNames(self):
2122     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123     self.share_locks = _ShareAll()
2124
2125   def CheckPrereq(self):
2126     """Check prerequisites.
2127
2128     """
2129     # Retrieve all information
2130     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131     self.all_node_info = self.cfg.GetAllNodesInfo()
2132     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2133
2134   def Exec(self, feedback_fn):
2135     """Verify integrity of cluster, performing various test on nodes.
2136
2137     """
2138     self.bad = False
2139     self._feedback_fn = feedback_fn
2140
2141     feedback_fn("* Verifying cluster config")
2142
2143     for msg in self.cfg.VerifyConfig():
2144       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2145
2146     feedback_fn("* Verifying cluster certificate files")
2147
2148     for cert_filename in pathutils.ALL_CERT_FILES:
2149       (errcode, msg) = _VerifyCertificate(cert_filename)
2150       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2151
2152     feedback_fn("* Verifying hypervisor parameters")
2153
2154     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155                                                 self.all_inst_info.values()))
2156
2157     feedback_fn("* Verifying all nodes belong to an existing group")
2158
2159     # We do this verification here because, should this bogus circumstance
2160     # occur, it would never be caught by VerifyGroup, which only acts on
2161     # nodes/instances reachable from existing node groups.
2162
2163     dangling_nodes = set(node.name for node in self.all_node_info.values()
2164                          if node.group not in self.all_group_info)
2165
2166     dangling_instances = {}
2167     no_node_instances = []
2168
2169     for inst in self.all_inst_info.values():
2170       if inst.primary_node in dangling_nodes:
2171         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172       elif inst.primary_node not in self.all_node_info:
2173         no_node_instances.append(inst.name)
2174
2175     pretty_dangling = [
2176         "%s (%s)" %
2177         (node.name,
2178          utils.CommaJoin(dangling_instances.get(node.name,
2179                                                 ["no instances"])))
2180         for node in dangling_nodes]
2181
2182     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2183                   None,
2184                   "the following nodes (and their instances) belong to a non"
2185                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2186
2187     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2188                   None,
2189                   "the following instances have a non-existing primary-node:"
2190                   " %s", utils.CommaJoin(no_node_instances))
2191
2192     return not self.bad
2193
2194
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196   """Verifies the status of a node group.
2197
2198   """
2199   HPATH = "cluster-verify"
2200   HTYPE = constants.HTYPE_CLUSTER
2201   REQ_BGL = False
2202
2203   _HOOKS_INDENT_RE = re.compile("^", re.M)
2204
2205   class NodeImage(object):
2206     """A class representing the logical and physical status of a node.
2207
2208     @type name: string
2209     @ivar name: the node name to which this object refers
2210     @ivar volumes: a structure as returned from
2211         L{ganeti.backend.GetVolumeList} (runtime)
2212     @ivar instances: a list of running instances (runtime)
2213     @ivar pinst: list of configured primary instances (config)
2214     @ivar sinst: list of configured secondary instances (config)
2215     @ivar sbp: dictionary of {primary-node: list of instances} for all
2216         instances for which this node is secondary (config)
2217     @ivar mfree: free memory, as reported by hypervisor (runtime)
2218     @ivar dfree: free disk, as reported by the node (runtime)
2219     @ivar offline: the offline status (config)
2220     @type rpc_fail: boolean
2221     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222         not whether the individual keys were correct) (runtime)
2223     @type lvm_fail: boolean
2224     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225     @type hyp_fail: boolean
2226     @ivar hyp_fail: whether the RPC call didn't return the instance list
2227     @type ghost: boolean
2228     @ivar ghost: whether this is a known node or not (config)
2229     @type os_fail: boolean
2230     @ivar os_fail: whether the RPC call didn't return valid OS data
2231     @type oslist: list
2232     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233     @type vm_capable: boolean
2234     @ivar vm_capable: whether the node can host instances
2235     @type pv_min: float
2236     @ivar pv_min: size in MiB of the smallest PVs
2237     @type pv_max: float
2238     @ivar pv_max: size in MiB of the biggest PVs
2239
2240     """
2241     def __init__(self, offline=False, name=None, vm_capable=True):
2242       self.name = name
2243       self.volumes = {}
2244       self.instances = []
2245       self.pinst = []
2246       self.sinst = []
2247       self.sbp = {}
2248       self.mfree = 0
2249       self.dfree = 0
2250       self.offline = offline
2251       self.vm_capable = vm_capable
2252       self.rpc_fail = False
2253       self.lvm_fail = False
2254       self.hyp_fail = False
2255       self.ghost = False
2256       self.os_fail = False
2257       self.oslist = {}
2258       self.pv_min = None
2259       self.pv_max = None
2260
2261   def ExpandNames(self):
2262     # This raises errors.OpPrereqError on its own:
2263     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2264
2265     # Get instances in node group; this is unsafe and needs verification later
2266     inst_names = \
2267       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2268
2269     self.needed_locks = {
2270       locking.LEVEL_INSTANCE: inst_names,
2271       locking.LEVEL_NODEGROUP: [self.group_uuid],
2272       locking.LEVEL_NODE: [],
2273
2274       # This opcode is run by watcher every five minutes and acquires all nodes
2275       # for a group. It doesn't run for a long time, so it's better to acquire
2276       # the node allocation lock as well.
2277       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2278       }
2279
2280     self.share_locks = _ShareAll()
2281
2282   def DeclareLocks(self, level):
2283     if level == locking.LEVEL_NODE:
2284       # Get members of node group; this is unsafe and needs verification later
2285       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2286
2287       all_inst_info = self.cfg.GetAllInstancesInfo()
2288
2289       # In Exec(), we warn about mirrored instances that have primary and
2290       # secondary living in separate node groups. To fully verify that
2291       # volumes for these instances are healthy, we will need to do an
2292       # extra call to their secondaries. We ensure here those nodes will
2293       # be locked.
2294       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295         # Important: access only the instances whose lock is owned
2296         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297           nodes.update(all_inst_info[inst].secondary_nodes)
2298
2299       self.needed_locks[locking.LEVEL_NODE] = nodes
2300
2301   def CheckPrereq(self):
2302     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2304
2305     group_nodes = set(self.group_info.members)
2306     group_instances = \
2307       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2308
2309     unlocked_nodes = \
2310         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2311
2312     unlocked_instances = \
2313         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2314
2315     if unlocked_nodes:
2316       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317                                  utils.CommaJoin(unlocked_nodes),
2318                                  errors.ECODE_STATE)
2319
2320     if unlocked_instances:
2321       raise errors.OpPrereqError("Missing lock for instances: %s" %
2322                                  utils.CommaJoin(unlocked_instances),
2323                                  errors.ECODE_STATE)
2324
2325     self.all_node_info = self.cfg.GetAllNodesInfo()
2326     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2327
2328     self.my_node_names = utils.NiceSort(group_nodes)
2329     self.my_inst_names = utils.NiceSort(group_instances)
2330
2331     self.my_node_info = dict((name, self.all_node_info[name])
2332                              for name in self.my_node_names)
2333
2334     self.my_inst_info = dict((name, self.all_inst_info[name])
2335                              for name in self.my_inst_names)
2336
2337     # We detect here the nodes that will need the extra RPC calls for verifying
2338     # split LV volumes; they should be locked.
2339     extra_lv_nodes = set()
2340
2341     for inst in self.my_inst_info.values():
2342       if inst.disk_template in constants.DTS_INT_MIRROR:
2343         for nname in inst.all_nodes:
2344           if self.all_node_info[nname].group != self.group_uuid:
2345             extra_lv_nodes.add(nname)
2346
2347     unlocked_lv_nodes = \
2348         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2349
2350     if unlocked_lv_nodes:
2351       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352                                  utils.CommaJoin(unlocked_lv_nodes),
2353                                  errors.ECODE_STATE)
2354     self.extra_lv_nodes = list(extra_lv_nodes)
2355
2356   def _VerifyNode(self, ninfo, nresult):
2357     """Perform some basic validation on data returned from a node.
2358
2359       - check the result data structure is well formed and has all the
2360         mandatory fields
2361       - check ganeti version
2362
2363     @type ninfo: L{objects.Node}
2364     @param ninfo: the node to check
2365     @param nresult: the results from the node
2366     @rtype: boolean
2367     @return: whether overall this call was successful (and we can expect
2368          reasonable values in the respose)
2369
2370     """
2371     node = ninfo.name
2372     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2373
2374     # main result, nresult should be a non-empty dict
2375     test = not nresult or not isinstance(nresult, dict)
2376     _ErrorIf(test, constants.CV_ENODERPC, node,
2377                   "unable to verify node: no data returned")
2378     if test:
2379       return False
2380
2381     # compares ganeti version
2382     local_version = constants.PROTOCOL_VERSION
2383     remote_version = nresult.get("version", None)
2384     test = not (remote_version and
2385                 isinstance(remote_version, (list, tuple)) and
2386                 len(remote_version) == 2)
2387     _ErrorIf(test, constants.CV_ENODERPC, node,
2388              "connection to node returned invalid data")
2389     if test:
2390       return False
2391
2392     test = local_version != remote_version[0]
2393     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394              "incompatible protocol versions: master %s,"
2395              " node %s", local_version, remote_version[0])
2396     if test:
2397       return False
2398
2399     # node seems compatible, we can actually try to look into its results
2400
2401     # full package version
2402     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403                   constants.CV_ENODEVERSION, node,
2404                   "software version mismatch: master %s, node %s",
2405                   constants.RELEASE_VERSION, remote_version[1],
2406                   code=self.ETYPE_WARNING)
2407
2408     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409     if ninfo.vm_capable and isinstance(hyp_result, dict):
2410       for hv_name, hv_result in hyp_result.iteritems():
2411         test = hv_result is not None
2412         _ErrorIf(test, constants.CV_ENODEHV, node,
2413                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2414
2415     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416     if ninfo.vm_capable and isinstance(hvp_result, list):
2417       for item, hv_name, hv_result in hvp_result:
2418         _ErrorIf(True, constants.CV_ENODEHV, node,
2419                  "hypervisor %s parameter verify failure (source %s): %s",
2420                  hv_name, item, hv_result)
2421
2422     test = nresult.get(constants.NV_NODESETUP,
2423                        ["Missing NODESETUP results"])
2424     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2425              "; ".join(test))
2426
2427     return True
2428
2429   def _VerifyNodeTime(self, ninfo, nresult,
2430                       nvinfo_starttime, nvinfo_endtime):
2431     """Check the node time.
2432
2433     @type ninfo: L{objects.Node}
2434     @param ninfo: the node to check
2435     @param nresult: the remote results for the node
2436     @param nvinfo_starttime: the start time of the RPC call
2437     @param nvinfo_endtime: the end time of the RPC call
2438
2439     """
2440     node = ninfo.name
2441     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2442
2443     ntime = nresult.get(constants.NV_TIME, None)
2444     try:
2445       ntime_merged = utils.MergeTime(ntime)
2446     except (ValueError, TypeError):
2447       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2448       return
2449
2450     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2454     else:
2455       ntime_diff = None
2456
2457     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458              "Node time diverges by at least %s from master node time",
2459              ntime_diff)
2460
2461   def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462     """Check the node LVM results and update info for cross-node checks.
2463
2464     @type ninfo: L{objects.Node}
2465     @param ninfo: the node to check
2466     @param nresult: the remote results for the node
2467     @param vg_name: the configured VG name
2468     @type nimg: L{NodeImage}
2469     @param nimg: node image
2470
2471     """
2472     if vg_name is None:
2473       return
2474
2475     node = ninfo.name
2476     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477
2478     # checks vg existence and size > 20G
2479     vglist = nresult.get(constants.NV_VGLIST, None)
2480     test = not vglist
2481     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2482     if not test:
2483       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484                                             constants.MIN_VG_SIZE)
2485       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2486
2487     # Check PVs
2488     (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2489     for em in errmsgs:
2490       self._Error(constants.CV_ENODELVM, node, em)
2491     if pvminmax is not None:
2492       (nimg.pv_min, nimg.pv_max) = pvminmax
2493
2494   def _VerifyGroupLVM(self, node_image, vg_name):
2495     """Check cross-node consistency in LVM.
2496
2497     @type node_image: dict
2498     @param node_image: info about nodes, mapping from node to names to
2499       L{NodeImage} objects
2500     @param vg_name: the configured VG name
2501
2502     """
2503     if vg_name is None:
2504       return
2505
2506     # Only exlcusive storage needs this kind of checks
2507     if not self._exclusive_storage:
2508       return
2509
2510     # exclusive_storage wants all PVs to have the same size (approximately),
2511     # if the smallest and the biggest ones are okay, everything is fine.
2512     # pv_min is None iff pv_max is None
2513     vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2514     if not vals:
2515       return
2516     (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517     (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518     bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519     self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520                   "PV sizes differ too much in the group; smallest (%s MB) is"
2521                   " on %s, biggest (%s MB) is on %s",
2522                   pvmin, minnode, pvmax, maxnode)
2523
2524   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525     """Check the node bridges.
2526
2527     @type ninfo: L{objects.Node}
2528     @param ninfo: the node to check
2529     @param nresult: the remote results for the node
2530     @param bridges: the expected list of bridges
2531
2532     """
2533     if not bridges:
2534       return
2535
2536     node = ninfo.name
2537     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2538
2539     missing = nresult.get(constants.NV_BRIDGES, None)
2540     test = not isinstance(missing, list)
2541     _ErrorIf(test, constants.CV_ENODENET, node,
2542              "did not return valid bridge information")
2543     if not test:
2544       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2546
2547   def _VerifyNodeUserScripts(self, ninfo, nresult):
2548     """Check the results of user scripts presence and executability on the node
2549
2550     @type ninfo: L{objects.Node}
2551     @param ninfo: the node to check
2552     @param nresult: the remote results for the node
2553
2554     """
2555     node = ninfo.name
2556
2557     test = not constants.NV_USERSCRIPTS in nresult
2558     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559                   "did not return user scripts information")
2560
2561     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2562     if not test:
2563       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564                     "user scripts not present or not executable: %s" %
2565                     utils.CommaJoin(sorted(broken_scripts)))
2566
2567   def _VerifyNodeNetwork(self, ninfo, nresult):
2568     """Check the node network connectivity results.
2569
2570     @type ninfo: L{objects.Node}
2571     @param ninfo: the node to check
2572     @param nresult: the remote results for the node
2573
2574     """
2575     node = ninfo.name
2576     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2577
2578     test = constants.NV_NODELIST not in nresult
2579     _ErrorIf(test, constants.CV_ENODESSH, node,
2580              "node hasn't returned node ssh connectivity data")
2581     if not test:
2582       if nresult[constants.NV_NODELIST]:
2583         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584           _ErrorIf(True, constants.CV_ENODESSH, node,
2585                    "ssh communication with node '%s': %s", a_node, a_msg)
2586
2587     test = constants.NV_NODENETTEST not in nresult
2588     _ErrorIf(test, constants.CV_ENODENET, node,
2589              "node hasn't returned node tcp connectivity data")
2590     if not test:
2591       if nresult[constants.NV_NODENETTEST]:
2592         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2593         for anode in nlist:
2594           _ErrorIf(True, constants.CV_ENODENET, node,
2595                    "tcp communication with node '%s': %s",
2596                    anode, nresult[constants.NV_NODENETTEST][anode])
2597
2598     test = constants.NV_MASTERIP not in nresult
2599     _ErrorIf(test, constants.CV_ENODENET, node,
2600              "node hasn't returned node master IP reachability data")
2601     if not test:
2602       if not nresult[constants.NV_MASTERIP]:
2603         if node == self.master_node:
2604           msg = "the master node cannot reach the master IP (not configured?)"
2605         else:
2606           msg = "cannot reach the master IP"
2607         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2608
2609   def _VerifyInstance(self, instance, inst_config, node_image,
2610                       diskstatus):
2611     """Verify an instance.
2612
2613     This function checks to see if the required block devices are
2614     available on the instance's node, and that the nodes are in the correct
2615     state.
2616
2617     """
2618     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619     pnode = inst_config.primary_node
2620     pnode_img = node_image[pnode]
2621     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2622
2623     node_vol_should = {}
2624     inst_config.MapLVsByNode(node_vol_should)
2625
2626     cluster = self.cfg.GetClusterInfo()
2627     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2628                                                             self.group_info)
2629     err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631              code=self.ETYPE_WARNING)
2632
2633     for node in node_vol_should:
2634       n_img = node_image[node]
2635       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636         # ignore missing volumes on offline or broken nodes
2637         continue
2638       for volume in node_vol_should[node]:
2639         test = volume not in n_img.volumes
2640         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641                  "volume %s missing on node %s", volume, node)
2642
2643     if inst_config.admin_state == constants.ADMINST_UP:
2644       test = instance not in pnode_img.instances and not pnode_img.offline
2645       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646                "instance not running on its primary node %s",
2647                pnode)
2648       _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649                "instance is marked as running and lives on offline node %s",
2650                pnode)
2651
2652     diskdata = [(nname, success, status, idx)
2653                 for (nname, disks) in diskstatus.items()
2654                 for idx, (success, status) in enumerate(disks)]
2655
2656     for nname, success, bdev_status, idx in diskdata:
2657       # the 'ghost node' construction in Exec() ensures that we have a
2658       # node here
2659       snode = node_image[nname]
2660       bad_snode = snode.ghost or snode.offline
2661       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662                not success and not bad_snode,
2663                constants.CV_EINSTANCEFAULTYDISK, instance,
2664                "couldn't retrieve status for disk/%s on %s: %s",
2665                idx, nname, bdev_status)
2666       _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668                constants.CV_EINSTANCEFAULTYDISK, instance,
2669                "disk/%s on %s is faulty", idx, nname)
2670
2671     _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672              constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673              " primary node failed", instance)
2674
2675     _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676              constants.CV_EINSTANCELAYOUT,
2677              instance, "instance has multiple secondary nodes: %s",
2678              utils.CommaJoin(inst_config.secondary_nodes),
2679              code=self.ETYPE_WARNING)
2680
2681     if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682       # Disk template not compatible with exclusive_storage: no instance
2683       # node should have the flag set
2684       es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685                                                      inst_config.all_nodes)
2686       es_nodes = [n for (n, es) in es_flags.items()
2687                   if es]
2688       _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689                "instance has template %s, which is not supported on nodes"
2690                " that have exclusive storage set: %s",
2691                inst_config.disk_template, utils.CommaJoin(es_nodes))
2692
2693     if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694       instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695       instance_groups = {}
2696
2697       for node in instance_nodes:
2698         instance_groups.setdefault(self.all_node_info[node].group,
2699                                    []).append(node)
2700
2701       pretty_list = [
2702         "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703         # Sort so that we always list the primary node first.
2704         for group, nodes in sorted(instance_groups.items(),
2705                                    key=lambda (_, nodes): pnode in nodes,
2706                                    reverse=True)]
2707
2708       self._ErrorIf(len(instance_groups) > 1,
2709                     constants.CV_EINSTANCESPLITGROUPS,
2710                     instance, "instance has primary and secondary nodes in"
2711                     " different groups: %s", utils.CommaJoin(pretty_list),
2712                     code=self.ETYPE_WARNING)
2713
2714     inst_nodes_offline = []
2715     for snode in inst_config.secondary_nodes:
2716       s_img = node_image[snode]
2717       _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718                snode, "instance %s, connection to secondary node failed",
2719                instance)
2720
2721       if s_img.offline:
2722         inst_nodes_offline.append(snode)
2723
2724     # warn that the instance lives on offline nodes
2725     _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726              "instance has offline secondary node(s) %s",
2727              utils.CommaJoin(inst_nodes_offline))
2728     # ... or ghost/non-vm_capable nodes
2729     for node in inst_config.all_nodes:
2730       _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731                instance, "instance lives on ghost node %s", node)
2732       _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733                instance, "instance lives on non-vm_capable node %s", node)
2734
2735   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736     """Verify if there are any unknown volumes in the cluster.
2737
2738     The .os, .swap and backup volumes are ignored. All other volumes are
2739     reported as unknown.
2740
2741     @type reserved: L{ganeti.utils.FieldSet}
2742     @param reserved: a FieldSet of reserved volume names
2743
2744     """
2745     for node, n_img in node_image.items():
2746       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747           self.all_node_info[node].group != self.group_uuid):
2748         # skip non-healthy nodes
2749         continue
2750       for volume in n_img.volumes:
2751         test = ((node not in node_vol_should or
2752                 volume not in node_vol_should[node]) and
2753                 not reserved.Matches(volume))
2754         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755                       "volume %s is unknown", volume)
2756
2757   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758     """Verify N+1 Memory Resilience.
2759
2760     Check that if one single node dies we can still start all the
2761     instances it was primary for.
2762
2763     """
2764     cluster_info = self.cfg.GetClusterInfo()
2765     for node, n_img in node_image.items():
2766       # This code checks that every node which is now listed as
2767       # secondary has enough memory to host all instances it is
2768       # supposed to should a single other node in the cluster fail.
2769       # FIXME: not ready for failover to an arbitrary node
2770       # FIXME: does not support file-backed instances
2771       # WARNING: we currently take into account down instances as well
2772       # as up ones, considering that even if they're down someone
2773       # might want to start them even in the event of a node failure.
2774       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775         # we're skipping nodes marked offline and nodes in other groups from
2776         # the N+1 warning, since most likely we don't have good memory
2777         # infromation from them; we already list instances living on such
2778         # nodes, and that's enough warning
2779         continue
2780       #TODO(dynmem): also consider ballooning out other instances
2781       for prinode, instances in n_img.sbp.items():
2782         needed_mem = 0
2783         for instance in instances:
2784           bep = cluster_info.FillBE(instance_cfg[instance])
2785           if bep[constants.BE_AUTO_BALANCE]:
2786             needed_mem += bep[constants.BE_MINMEM]
2787         test = n_img.mfree < needed_mem
2788         self._ErrorIf(test, constants.CV_ENODEN1, node,
2789                       "not enough memory to accomodate instance failovers"
2790                       " should node %s fail (%dMiB needed, %dMiB available)",
2791                       prinode, needed_mem, n_img.mfree)
2792
2793   @classmethod
2794   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795                    (files_all, files_opt, files_mc, files_vm)):
2796     """Verifies file checksums collected from all nodes.
2797
2798     @param errorif: Callback for reporting errors
2799     @param nodeinfo: List of L{objects.Node} objects
2800     @param master_node: Name of master node
2801     @param all_nvinfo: RPC results
2802
2803     """
2804     # Define functions determining which nodes to consider for a file
2805     files2nodefn = [
2806       (files_all, None),
2807       (files_mc, lambda node: (node.master_candidate or
2808                                node.name == master_node)),
2809       (files_vm, lambda node: node.vm_capable),
2810       ]
2811
2812     # Build mapping from filename to list of nodes which should have the file
2813     nodefiles = {}
2814     for (files, fn) in files2nodefn:
2815       if fn is None:
2816         filenodes = nodeinfo
2817       else:
2818         filenodes = filter(fn, nodeinfo)
2819       nodefiles.update((filename,
2820                         frozenset(map(operator.attrgetter("name"), filenodes)))
2821                        for filename in files)
2822
2823     assert set(nodefiles) == (files_all | files_mc | files_vm)
2824
2825     fileinfo = dict((filename, {}) for filename in nodefiles)
2826     ignore_nodes = set()
2827
2828     for node in nodeinfo:
2829       if node.offline:
2830         ignore_nodes.add(node.name)
2831         continue
2832
2833       nresult = all_nvinfo[node.name]
2834
2835       if nresult.fail_msg or not nresult.payload:
2836         node_files = None
2837       else:
2838         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840                           for (key, value) in fingerprints.items())
2841         del fingerprints
2842
2843       test = not (node_files and isinstance(node_files, dict))
2844       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845               "Node did not return file checksum data")
2846       if test:
2847         ignore_nodes.add(node.name)
2848         continue
2849
2850       # Build per-checksum mapping from filename to nodes having it
2851       for (filename, checksum) in node_files.items():
2852         assert filename in nodefiles
2853         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2854
2855     for (filename, checksums) in fileinfo.items():
2856       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2857
2858       # Nodes having the file
2859       with_file = frozenset(node_name
2860                             for nodes in fileinfo[filename].values()
2861                             for node_name in nodes) - ignore_nodes
2862
2863       expected_nodes = nodefiles[filename] - ignore_nodes
2864
2865       # Nodes missing file
2866       missing_file = expected_nodes - with_file
2867
2868       if filename in files_opt:
2869         # All or no nodes
2870         errorif(missing_file and missing_file != expected_nodes,
2871                 constants.CV_ECLUSTERFILECHECK, None,
2872                 "File %s is optional, but it must exist on all or no"
2873                 " nodes (not found on %s)",
2874                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2875       else:
2876         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877                 "File %s is missing from node(s) %s", filename,
2878                 utils.CommaJoin(utils.NiceSort(missing_file)))
2879
2880         # Warn if a node has a file it shouldn't
2881         unexpected = with_file - expected_nodes
2882         errorif(unexpected,
2883                 constants.CV_ECLUSTERFILECHECK, None,
2884                 "File %s should not exist on node(s) %s",
2885                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2886
2887       # See if there are multiple versions of the file
2888       test = len(checksums) > 1
2889       if test:
2890         variants = ["variant %s on %s" %
2891                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892                     for (idx, (checksum, nodes)) in
2893                       enumerate(sorted(checksums.items()))]
2894       else:
2895         variants = []
2896
2897       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898               "File %s found with %s different checksums (%s)",
2899               filename, len(checksums), "; ".join(variants))
2900
2901   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2902                       drbd_map):
2903     """Verifies and the node DRBD status.
2904
2905     @type ninfo: L{objects.Node}
2906     @param ninfo: the node to check
2907     @param nresult: the remote results for the node
2908     @param instanceinfo: the dict of instances
2909     @param drbd_helper: the configured DRBD usermode helper
2910     @param drbd_map: the DRBD map as returned by
2911         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2912
2913     """
2914     node = ninfo.name
2915     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2916
2917     if drbd_helper:
2918       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919       test = (helper_result is None)
2920       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921                "no drbd usermode helper returned")
2922       if helper_result:
2923         status, payload = helper_result
2924         test = not status
2925         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926                  "drbd usermode helper check unsuccessful: %s", payload)
2927         test = status and (payload != drbd_helper)
2928         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929                  "wrong drbd usermode helper: %s", payload)
2930
2931     # compute the DRBD minors
2932     node_drbd = {}
2933     for minor, instance in drbd_map[node].items():
2934       test = instance not in instanceinfo
2935       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936                "ghost instance '%s' in temporary DRBD map", instance)
2937         # ghost instance should not be running, but otherwise we
2938         # don't give double warnings (both ghost instance and
2939         # unallocated minor in use)
2940       if test:
2941         node_drbd[minor] = (instance, False)
2942       else:
2943         instance = instanceinfo[instance]
2944         node_drbd[minor] = (instance.name,
2945                             instance.admin_state == constants.ADMINST_UP)
2946
2947     # and now check them
2948     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949     test = not isinstance(used_minors, (tuple, list))
2950     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951              "cannot parse drbd status file: %s", str(used_minors))
2952     if test:
2953       # we cannot check drbd status
2954       return
2955
2956     for minor, (iname, must_exist) in node_drbd.items():
2957       test = minor not in used_minors and must_exist
2958       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959                "drbd minor %d of instance %s is not active", minor, iname)
2960     for minor in used_minors:
2961       test = minor not in node_drbd
2962       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963                "unallocated drbd minor %d is in use", minor)
2964
2965   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966     """Builds the node OS structures.
2967
2968     @type ninfo: L{objects.Node}
2969     @param ninfo: the node to check
2970     @param nresult: the remote results for the node
2971     @param nimg: the node image object
2972
2973     """
2974     node = ninfo.name
2975     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2976
2977     remote_os = nresult.get(constants.NV_OSLIST, None)
2978     test = (not isinstance(remote_os, list) or
2979             not compat.all(isinstance(v, list) and len(v) == 7
2980                            for v in remote_os))
2981
2982     _ErrorIf(test, constants.CV_ENODEOS, node,
2983              "node hasn't returned valid OS data")
2984
2985     nimg.os_fail = test
2986
2987     if test:
2988       return
2989
2990     os_dict = {}
2991
2992     for (name, os_path, status, diagnose,
2993          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2994
2995       if name not in os_dict:
2996         os_dict[name] = []
2997
2998       # parameters is a list of lists instead of list of tuples due to
2999       # JSON lacking a real tuple type, fix it:
3000       parameters = [tuple(v) for v in parameters]
3001       os_dict[name].append((os_path, status, diagnose,
3002                             set(variants), set(parameters), set(api_ver)))
3003
3004     nimg.oslist = os_dict
3005
3006   def _VerifyNodeOS(self, ninfo, nimg, base):
3007     """Verifies the node OS list.
3008
3009     @type ninfo: L{objects.Node}
3010     @param ninfo: the node to check
3011     @param nimg: the node image object
3012     @param base: the 'template' node we match against (e.g. from the master)
3013
3014     """
3015     node = ninfo.name
3016     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3017
3018     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3019
3020     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021     for os_name, os_data in nimg.oslist.items():
3022       assert os_data, "Empty OS status for OS %s?!" % os_name
3023       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027                "OS '%s' has multiple entries (first one shadows the rest): %s",
3028                os_name, utils.CommaJoin([v[0] for v in os_data]))
3029       # comparisons with the 'base' image
3030       test = os_name not in base.oslist
3031       _ErrorIf(test, constants.CV_ENODEOS, node,
3032                "Extra OS %s not present on reference node (%s)",
3033                os_name, base.name)
3034       if test:
3035         continue
3036       assert base.oslist[os_name], "Base node has empty OS status?"
3037       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3038       if not b_status:
3039         # base OS is invalid, skipping
3040         continue
3041       for kind, a, b in [("API version", f_api, b_api),
3042                          ("variants list", f_var, b_var),
3043                          ("parameters", beautify_params(f_param),
3044                           beautify_params(b_param))]:
3045         _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047                  kind, os_name, base.name,
3048                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3049
3050     # check any missing OSes
3051     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052     _ErrorIf(missing, constants.CV_ENODEOS, node,
3053              "OSes present on reference node %s but missing on this node: %s",
3054              base.name, utils.CommaJoin(missing))
3055
3056   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3058
3059     @type ninfo: L{objects.Node}
3060     @param ninfo: the node to check
3061     @param nresult: the remote results for the node
3062     @type is_master: bool
3063     @param is_master: Whether node is the master node
3064
3065     """
3066     node = ninfo.name
3067
3068     if (is_master and
3069         (constants.ENABLE_FILE_STORAGE or
3070          constants.ENABLE_SHARED_FILE_STORAGE)):
3071       try:
3072         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3073       except KeyError:
3074         # This should never happen
3075         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076                       "Node did not return forbidden file storage paths")
3077       else:
3078         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079                       "Found forbidden file storage paths: %s",
3080                       utils.CommaJoin(fspaths))
3081     else:
3082       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083                     constants.CV_ENODEFILESTORAGEPATHS, node,
3084                     "Node should not have returned forbidden file storage"
3085                     " paths")
3086
3087   def _VerifyOob(self, ninfo, nresult):
3088     """Verifies out of band functionality of a node.
3089
3090     @type ninfo: L{objects.Node}
3091     @param ninfo: the node to check
3092     @param nresult: the remote results for the node
3093
3094     """
3095     node = ninfo.name
3096     # We just have to verify the paths on master and/or master candidates
3097     # as the oob helper is invoked on the master
3098     if ((ninfo.master_candidate or ninfo.master_capable) and
3099         constants.NV_OOB_PATHS in nresult):
3100       for path_result in nresult[constants.NV_OOB_PATHS]:
3101         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3102
3103   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104     """Verifies and updates the node volume data.
3105
3106     This function will update a L{NodeImage}'s internal structures
3107     with data from the remote call.
3108
3109     @type ninfo: L{objects.Node}
3110     @param ninfo: the node to check
3111     @param nresult: the remote results for the node
3112     @param nimg: the node image object
3113     @param vg_name: the configured VG name
3114
3115     """
3116     node = ninfo.name
3117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118
3119     nimg.lvm_fail = True
3120     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3121     if vg_name is None:
3122       pass
3123     elif isinstance(lvdata, basestring):
3124       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125                utils.SafeEncode(lvdata))
3126     elif not isinstance(lvdata, dict):
3127       _ErrorIf(True, constants.CV_ENODELVM, node,
3128                "rpc call to node failed (lvlist)")
3129     else:
3130       nimg.volumes = lvdata
3131       nimg.lvm_fail = False
3132
3133   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134     """Verifies and updates the node instance list.
3135
3136     If the listing was successful, then updates this node's instance
3137     list. Otherwise, it marks the RPC call as failed for the instance
3138     list key.
3139
3140     @type ninfo: L{objects.Node}
3141     @param ninfo: the node to check
3142     @param nresult: the remote results for the node
3143     @param nimg: the node image object
3144
3145     """
3146     idata = nresult.get(constants.NV_INSTANCELIST, None)
3147     test = not isinstance(idata, list)
3148     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149                   "rpc call to node failed (instancelist): %s",
3150                   utils.SafeEncode(str(idata)))
3151     if test:
3152       nimg.hyp_fail = True
3153     else:
3154       nimg.instances = idata
3155
3156   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157     """Verifies and computes a node information map
3158
3159     @type ninfo: L{objects.Node}
3160     @param ninfo: the node to check
3161     @param nresult: the remote results for the node
3162     @param nimg: the node image object
3163     @param vg_name: the configured VG name
3164
3165     """
3166     node = ninfo.name
3167     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3168
3169     # try to read free memory (from the hypervisor)
3170     hv_info = nresult.get(constants.NV_HVINFO, None)
3171     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172     _ErrorIf(test, constants.CV_ENODEHV, node,
3173              "rpc call to node failed (hvinfo)")
3174     if not test:
3175       try:
3176         nimg.mfree = int(hv_info["memory_free"])
3177       except (ValueError, TypeError):
3178         _ErrorIf(True, constants.CV_ENODERPC, node,
3179                  "node returned invalid nodeinfo, check hypervisor")
3180
3181     # FIXME: devise a free space model for file based instances as well
3182     if vg_name is not None:
3183       test = (constants.NV_VGLIST not in nresult or
3184               vg_name not in nresult[constants.NV_VGLIST])
3185       _ErrorIf(test, constants.CV_ENODELVM, node,
3186                "node didn't return data for the volume group '%s'"
3187                " - it is either missing or broken", vg_name)
3188       if not test:
3189         try:
3190           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191         except (ValueError, TypeError):
3192           _ErrorIf(True, constants.CV_ENODERPC, node,
3193                    "node returned invalid LVM info, check LVM status")
3194
3195   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196     """Gets per-disk status information for all instances.
3197
3198     @type nodelist: list of strings
3199     @param nodelist: Node names
3200     @type node_image: dict of (name, L{objects.Node})
3201     @param node_image: Node objects
3202     @type instanceinfo: dict of (name, L{objects.Instance})
3203     @param instanceinfo: Instance objects
3204     @rtype: {instance: {node: [(succes, payload)]}}
3205     @return: a dictionary of per-instance dictionaries with nodes as
3206         keys and disk information as values; the disk information is a
3207         list of tuples (success, payload)
3208
3209     """
3210     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3211
3212     node_disks = {}
3213     node_disks_devonly = {}
3214     diskless_instances = set()
3215     diskless = constants.DT_DISKLESS
3216
3217     for nname in nodelist:
3218       node_instances = list(itertools.chain(node_image[nname].pinst,
3219                                             node_image[nname].sinst))
3220       diskless_instances.update(inst for inst in node_instances
3221                                 if instanceinfo[inst].disk_template == diskless)
3222       disks = [(inst, disk)
3223                for inst in node_instances
3224                for disk in instanceinfo[inst].disks]
3225
3226       if not disks:
3227         # No need to collect data
3228         continue
3229
3230       node_disks[nname] = disks
3231
3232       # _AnnotateDiskParams makes already copies of the disks
3233       devonly = []
3234       for (inst, dev) in disks:
3235         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236         self.cfg.SetDiskID(anno_disk, nname)
3237         devonly.append(anno_disk)
3238
3239       node_disks_devonly[nname] = devonly
3240
3241     assert len(node_disks) == len(node_disks_devonly)
3242
3243     # Collect data from all nodes with disks
3244     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3245                                                           node_disks_devonly)
3246
3247     assert len(result) == len(node_disks)
3248
3249     instdisk = {}
3250
3251     for (nname, nres) in result.items():
3252       disks = node_disks[nname]
3253
3254       if nres.offline:
3255         # No data from this node
3256         data = len(disks) * [(False, "node offline")]
3257       else:
3258         msg = nres.fail_msg
3259         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260                  "while getting disk information: %s", msg)
3261         if msg:
3262           # No data from this node
3263           data = len(disks) * [(False, msg)]
3264         else:
3265           data = []
3266           for idx, i in enumerate(nres.payload):
3267             if isinstance(i, (tuple, list)) and len(i) == 2:
3268               data.append(i)
3269             else:
3270               logging.warning("Invalid result from node %s, entry %d: %s",
3271                               nname, idx, i)
3272               data.append((False, "Invalid result from the remote node"))
3273
3274       for ((inst, _), status) in zip(disks, data):
3275         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3276
3277     # Add empty entries for diskless instances.
3278     for inst in diskless_instances:
3279       assert inst not in instdisk
3280       instdisk[inst] = {}
3281
3282     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284                       compat.all(isinstance(s, (tuple, list)) and
3285                                  len(s) == 2 for s in statuses)
3286                       for inst, nnames in instdisk.items()
3287                       for nname, statuses in nnames.items())
3288     if __debug__:
3289       instdisk_keys = set(instdisk)
3290       instanceinfo_keys = set(instanceinfo)
3291       assert instdisk_keys == instanceinfo_keys, \
3292         ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293          (instdisk_keys, instanceinfo_keys))
3294
3295     return instdisk
3296
3297   @staticmethod
3298   def _SshNodeSelector(group_uuid, all_nodes):
3299     """Create endless iterators for all potential SSH check hosts.
3300
3301     """
3302     nodes = [node for node in all_nodes
3303              if (node.group != group_uuid and
3304                  not node.offline)]
3305     keyfunc = operator.attrgetter("group")
3306
3307     return map(itertools.cycle,
3308                [sorted(map(operator.attrgetter("name"), names))
3309                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3310                                                   keyfunc)])
3311
3312   @classmethod
3313   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314     """Choose which nodes should talk to which other nodes.
3315
3316     We will make nodes contact all nodes in their group, and one node from
3317     every other group.
3318
3319     @warning: This algorithm has a known issue if one node group is much
3320       smaller than others (e.g. just one node). In such a case all other
3321       nodes will talk to the single node.
3322
3323     """
3324     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3326
3327     return (online_nodes,
3328             dict((name, sorted([i.next() for i in sel]))
3329                  for name in online_nodes))
3330
3331   def BuildHooksEnv(self):
3332     """Build hooks env.
3333
3334     Cluster-Verify hooks just ran in the post phase and their failure makes
3335     the output be logged in the verify output and the verification to fail.
3336
3337     """
3338     env = {
3339       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3340       }
3341
3342     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343                for node in self.my_node_info.values())
3344
3345     return env
3346
3347   def BuildHooksNodes(self):
3348     """Build hooks nodes.
3349
3350     """
3351     return ([], self.my_node_names)
3352
3353   def Exec(self, feedback_fn):
3354     """Verify integrity of the node group, performing various test on nodes.
3355
3356     """
3357     # This method has too many local variables. pylint: disable=R0914
3358     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3359
3360     if not self.my_node_names:
3361       # empty node group
3362       feedback_fn("* Empty node group, skipping verification")
3363       return True
3364
3365     self.bad = False
3366     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367     verbose = self.op.verbose
3368     self._feedback_fn = feedback_fn
3369
3370     vg_name = self.cfg.GetVGName()
3371     drbd_helper = self.cfg.GetDRBDHelper()
3372     cluster = self.cfg.GetClusterInfo()
3373     hypervisors = cluster.enabled_hypervisors
3374     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3375
3376     i_non_redundant = [] # Non redundant instances
3377     i_non_a_balanced = [] # Non auto-balanced instances
3378     i_offline = 0 # Count of offline instances
3379     n_offline = 0 # Count of offline nodes
3380     n_drained = 0 # Count of nodes being drained
3381     node_vol_should = {}
3382
3383     # FIXME: verify OS list
3384
3385     # File verification
3386     filemap = _ComputeAncillaryFiles(cluster, False)
3387
3388     # do local checksums
3389     master_node = self.master_node = self.cfg.GetMasterNode()
3390     master_ip = self.cfg.GetMasterIP()
3391
3392     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3393
3394     user_scripts = []
3395     if self.cfg.GetUseExternalMipScript():
3396       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3397
3398     node_verify_param = {
3399       constants.NV_FILELIST:
3400         map(vcluster.MakeVirtualPath,
3401             utils.UniqueSequence(filename
3402                                  for files in filemap
3403                                  for filename in files)),
3404       constants.NV_NODELIST:
3405         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406                                   self.all_node_info.values()),
3407       constants.NV_HYPERVISOR: hypervisors,
3408       constants.NV_HVPARAMS:
3409         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411                                  for node in node_data_list
3412                                  if not node.offline],
3413       constants.NV_INSTANCELIST: hypervisors,
3414       constants.NV_VERSION: None,
3415       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416       constants.NV_NODESETUP: None,
3417       constants.NV_TIME: None,
3418       constants.NV_MASTERIP: (master_node, master_ip),
3419       constants.NV_OSLIST: None,
3420       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421       constants.NV_USERSCRIPTS: user_scripts,
3422       }
3423
3424     if vg_name is not None:
3425       node_verify_param[constants.NV_VGLIST] = None
3426       node_verify_param[constants.NV_LVLIST] = vg_name
3427       node_verify_param[constants.NV_PVLIST] = [vg_name]
3428
3429     if drbd_helper:
3430       node_verify_param[constants.NV_DRBDLIST] = None
3431       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3432
3433     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434       # Load file storage paths only from master node
3435       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3436
3437     # bridge checks
3438     # FIXME: this needs to be changed per node-group, not cluster-wide
3439     bridges = set()
3440     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442       bridges.add(default_nicpp[constants.NIC_LINK])
3443     for instance in self.my_inst_info.values():
3444       for nic in instance.nics:
3445         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447           bridges.add(full_nic[constants.NIC_LINK])
3448
3449     if bridges:
3450       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3451
3452     # Build our expected cluster state
3453     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3454                                                  name=node.name,
3455                                                  vm_capable=node.vm_capable))
3456                       for node in node_data_list)
3457
3458     # Gather OOB paths
3459     oob_paths = []
3460     for node in self.all_node_info.values():
3461       path = _SupportsOob(self.cfg, node)
3462       if path and path not in oob_paths:
3463         oob_paths.append(path)
3464
3465     if oob_paths:
3466       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3467
3468     for instance in self.my_inst_names:
3469       inst_config = self.my_inst_info[instance]
3470       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3471         i_offline += 1
3472
3473       for nname in inst_config.all_nodes:
3474         if nname not in node_image:
3475           gnode = self.NodeImage(name=nname)
3476           gnode.ghost = (nname not in self.all_node_info)
3477           node_image[nname] = gnode
3478
3479       inst_config.MapLVsByNode(node_vol_should)
3480
3481       pnode = inst_config.primary_node
3482       node_image[pnode].pinst.append(instance)
3483
3484       for snode in inst_config.secondary_nodes:
3485         nimg = node_image[snode]
3486         nimg.sinst.append(instance)
3487         if pnode not in nimg.sbp:
3488           nimg.sbp[pnode] = []
3489         nimg.sbp[pnode].append(instance)
3490
3491     es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492     # The value of exclusive_storage should be the same across the group, so if
3493     # it's True for at least a node, we act as if it were set for all the nodes
3494     self._exclusive_storage = compat.any(es_flags.values())
3495     if self._exclusive_storage:
3496       node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3497
3498     # At this point, we have the in-memory data structures complete,
3499     # except for the runtime information, which we'll gather next
3500
3501     # Due to the way our RPC system works, exact response times cannot be
3502     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503     # time before and after executing the request, we can at least have a time
3504     # window.
3505     nvinfo_starttime = time.time()
3506     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3507                                            node_verify_param,
3508                                            self.cfg.GetClusterName())
3509     nvinfo_endtime = time.time()
3510
3511     if self.extra_lv_nodes and vg_name is not None:
3512       extra_lv_nvinfo = \
3513           self.rpc.call_node_verify(self.extra_lv_nodes,
3514                                     {constants.NV_LVLIST: vg_name},
3515                                     self.cfg.GetClusterName())
3516     else:
3517       extra_lv_nvinfo = {}
3518
3519     all_drbd_map = self.cfg.ComputeDRBDMap()
3520
3521     feedback_fn("* Gathering disk information (%s nodes)" %
3522                 len(self.my_node_names))
3523     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3524                                      self.my_inst_info)
3525
3526     feedback_fn("* Verifying configuration file consistency")
3527
3528     # If not all nodes are being checked, we need to make sure the master node
3529     # and a non-checked vm_capable node are in the list.
3530     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3531     if absent_nodes:
3532       vf_nvinfo = all_nvinfo.copy()
3533       vf_node_info = list(self.my_node_info.values())
3534       additional_nodes = []
3535       if master_node not in self.my_node_info:
3536         additional_nodes.append(master_node)
3537         vf_node_info.append(self.all_node_info[master_node])
3538       # Add the first vm_capable node we find which is not included,
3539       # excluding the master node (which we already have)
3540       for node in absent_nodes:
3541         nodeinfo = self.all_node_info[node]
3542         if (nodeinfo.vm_capable and not nodeinfo.offline and
3543             node != master_node):
3544           additional_nodes.append(node)
3545           vf_node_info.append(self.all_node_info[node])
3546           break
3547       key = constants.NV_FILELIST
3548       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549                                                  {key: node_verify_param[key]},
3550                                                  self.cfg.GetClusterName()))
3551     else:
3552       vf_nvinfo = all_nvinfo
3553       vf_node_info = self.my_node_info.values()
3554
3555     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3556
3557     feedback_fn("* Verifying node status")
3558
3559     refos_img = None
3560
3561     for node_i in node_data_list:
3562       node = node_i.name
3563       nimg = node_image[node]
3564
3565       if node_i.offline:
3566         if verbose:
3567           feedback_fn("* Skipping offline node %s" % (node,))
3568         n_offline += 1
3569         continue
3570
3571       if node == master_node:
3572         ntype = "master"
3573       elif node_i.master_candidate:
3574         ntype = "master candidate"
3575       elif node_i.drained:
3576         ntype = "drained"
3577         n_drained += 1
3578       else:
3579         ntype = "regular"
3580       if verbose:
3581         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3582
3583       msg = all_nvinfo[node].fail_msg
3584       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3585                msg)
3586       if msg:
3587         nimg.rpc_fail = True
3588         continue
3589
3590       nresult = all_nvinfo[node].payload
3591
3592       nimg.call_ok = self._VerifyNode(node_i, nresult)
3593       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594       self._VerifyNodeNetwork(node_i, nresult)
3595       self._VerifyNodeUserScripts(node_i, nresult)
3596       self._VerifyOob(node_i, nresult)
3597       self._VerifyFileStoragePaths(node_i, nresult,
3598                                    node == master_node)
3599
3600       if nimg.vm_capable:
3601         self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3603                              all_drbd_map)
3604
3605         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606         self._UpdateNodeInstances(node_i, nresult, nimg)
3607         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608         self._UpdateNodeOS(node_i, nresult, nimg)
3609
3610         if not nimg.os_fail:
3611           if refos_img is None:
3612             refos_img = nimg
3613           self._VerifyNodeOS(node_i, nimg, refos_img)
3614         self._VerifyNodeBridges(node_i, nresult, bridges)
3615
3616         # Check whether all running instancies are primary for the node. (This
3617         # can no longer be done from _VerifyInstance below, since some of the
3618         # wrong instances could be from other node groups.)
3619         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3620
3621         for inst in non_primary_inst:
3622           test = inst in self.all_inst_info
3623           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624                    "instance should not run on node %s", node_i.name)
3625           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626                    "node is running unknown instance %s", inst)
3627
3628     self._VerifyGroupLVM(node_image, vg_name)
3629
3630     for node, result in extra_lv_nvinfo.items():
3631       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632                               node_image[node], vg_name)
3633
3634     feedback_fn("* Verifying instance status")
3635     for instance in self.my_inst_names:
3636       if verbose:
3637         feedback_fn("* Verifying instance %s" % instance)
3638       inst_config = self.my_inst_info[instance]
3639       self._VerifyInstance(instance, inst_config, node_image,
3640                            instdisk[instance])
3641
3642       # If the instance is non-redundant we cannot survive losing its primary
3643       # node, so we are not N+1 compliant.
3644       if inst_config.disk_template not in constants.DTS_MIRRORED:
3645         i_non_redundant.append(instance)
3646
3647       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648         i_non_a_balanced.append(instance)
3649
3650     feedback_fn("* Verifying orphan volumes")
3651     reserved = utils.FieldSet(*cluster.reserved_lvs)
3652
3653     # We will get spurious "unknown volume" warnings if any node of this group
3654     # is secondary for an instance whose primary is in another group. To avoid
3655     # them, we find these instances and add their volumes to node_vol_should.
3656     for inst in self.all_inst_info.values():
3657       for secondary in inst.secondary_nodes:
3658         if (secondary in self.my_node_info
3659             and inst.name not in self.my_inst_info):
3660           inst.MapLVsByNode(node_vol_should)
3661           break
3662
3663     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3664
3665     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666       feedback_fn("* Verifying N+1 Memory redundancy")
3667       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3668
3669     feedback_fn("* Other Notes")
3670     if i_non_redundant:
3671       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3672                   % len(i_non_redundant))
3673
3674     if i_non_a_balanced:
3675       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3676                   % len(i_non_a_balanced))
3677
3678     if i_offline:
3679       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3680
3681     if n_offline:
3682       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3683
3684     if n_drained:
3685       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3686
3687     return not self.bad
3688
3689   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690     """Analyze the post-hooks' result
3691
3692     This method analyses the hook result, handles it, and sends some
3693     nicely-formatted feedback back to the user.
3694
3695     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697     @param hooks_results: the results of the multi-node hooks rpc call
3698     @param feedback_fn: function used send feedback back to the caller
3699     @param lu_result: previous Exec result
3700     @return: the new Exec result, based on the previous result
3701         and hook results
3702
3703     """
3704     # We only really run POST phase hooks, only for non-empty groups,
3705     # and are only interested in their results
3706     if not self.my_node_names:
3707       # empty node group
3708       pass
3709     elif phase == constants.HOOKS_PHASE_POST:
3710       # Used to change hooks' output to proper indentation
3711       feedback_fn("* Hooks Results")
3712       assert hooks_results, "invalid result from hooks"
3713
3714       for node_name in hooks_results:
3715         res = hooks_results[node_name]
3716         msg = res.fail_msg
3717         test = msg and not res.offline
3718         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719                       "Communication failure in hooks execution: %s", msg)
3720         if res.offline or msg:
3721           # No need to investigate payload if node is offline or gave
3722           # an error.
3723           continue
3724         for script, hkr, output in res.payload:
3725           test = hkr == constants.HKR_FAIL
3726           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727                         "Script %s failed, output:", script)
3728           if test:
3729             output = self._HOOKS_INDENT_RE.sub("      ", output)
3730             feedback_fn("%s" % output)
3731             lu_result = False
3732
3733     return lu_result
3734
3735
3736 class LUClusterVerifyDisks(NoHooksLU):
3737   """Verifies the cluster disks status.
3738
3739   """
3740   REQ_BGL = False
3741
3742   def ExpandNames(self):
3743     self.share_locks = _ShareAll()
3744     self.needed_locks = {
3745       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3746       }
3747
3748   def Exec(self, feedback_fn):
3749     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3750
3751     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753                            for group in group_names])
3754
3755
3756 class LUGroupVerifyDisks(NoHooksLU):
3757   """Verifies the status of all disks in a node group.
3758
3759   """
3760   REQ_BGL = False
3761
3762   def ExpandNames(self):
3763     # Raises errors.OpPrereqError on its own if group can't be found
3764     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3765
3766     self.share_locks = _ShareAll()
3767     self.needed_locks = {
3768       locking.LEVEL_INSTANCE: [],
3769       locking.LEVEL_NODEGROUP: [],
3770       locking.LEVEL_NODE: [],
3771
3772       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773       # starts one instance of this opcode for every group, which means all
3774       # nodes will be locked for a short amount of time, so it's better to
3775       # acquire the node allocation lock as well.
3776       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3777       }
3778
3779   def DeclareLocks(self, level):
3780     if level == locking.LEVEL_INSTANCE:
3781       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3782
3783       # Lock instances optimistically, needs verification once node and group
3784       # locks have been acquired
3785       self.needed_locks[locking.LEVEL_INSTANCE] = \
3786         self.cfg.GetNodeGroupInstances(self.group_uuid)
3787
3788     elif level == locking.LEVEL_NODEGROUP:
3789       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3790
3791       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792         set([self.group_uuid] +
3793             # Lock all groups used by instances optimistically; this requires
3794             # going via the node before it's locked, requiring verification
3795             # later on
3796             [group_uuid
3797              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3799
3800     elif level == locking.LEVEL_NODE:
3801       # This will only lock the nodes in the group to be verified which contain
3802       # actual instances
3803       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804       self._LockInstancesNodes()
3805
3806       # Lock all nodes in group to be verified
3807       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3810
3811   def CheckPrereq(self):
3812     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3815
3816     assert self.group_uuid in owned_groups
3817
3818     # Check if locked instances are still correct
3819     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3820
3821     # Get instance information
3822     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3823
3824     # Check if node groups for locked instances are still correct
3825     _CheckInstancesNodeGroups(self.cfg, self.instances,
3826                               owned_groups, owned_nodes, self.group_uuid)
3827
3828   def Exec(self, feedback_fn):
3829     """Verify integrity of cluster disks.
3830
3831     @rtype: tuple of three items
3832     @return: a tuple of (dict of node-to-node_error, list of instances
3833         which need activate-disks, dict of instance: (node, volume) for
3834         missing volumes
3835
3836     """
3837     res_nodes = {}
3838     res_instances = set()
3839     res_missing = {}
3840
3841     nv_dict = _MapInstanceDisksToNodes(
3842       [inst for inst in self.instances.values()
3843        if inst.admin_state == constants.ADMINST_UP])
3844
3845     if nv_dict:
3846       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847                              set(self.cfg.GetVmCapableNodeList()))
3848
3849       node_lvs = self.rpc.call_lv_list(nodes, [])
3850
3851       for (node, node_res) in node_lvs.items():
3852         if node_res.offline:
3853           continue
3854
3855         msg = node_res.fail_msg
3856         if msg:
3857           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858           res_nodes[node] = msg
3859           continue
3860
3861         for lv_name, (_, _, lv_online) in node_res.payload.items():
3862           inst = nv_dict.pop((node, lv_name), None)
3863           if not (lv_online or inst is None):
3864             res_instances.add(inst)
3865
3866       # any leftover items in nv_dict are missing LVs, let's arrange the data
3867       # better
3868       for key, inst in nv_dict.iteritems():
3869         res_missing.setdefault(inst, []).append(list(key))
3870
3871     return (res_nodes, list(res_instances), res_missing)
3872
3873
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875   """Verifies the cluster disks sizes.
3876
3877   """
3878   REQ_BGL = False
3879
3880   def ExpandNames(self):
3881     if self.op.instances:
3882       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883       # Not getting the node allocation lock as only a specific set of
3884       # instances (and their nodes) is going to be acquired
3885       self.needed_locks = {
3886         locking.LEVEL_NODE_RES: [],
3887         locking.LEVEL_INSTANCE: self.wanted_names,
3888         }
3889       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3890     else:
3891       self.wanted_names = None
3892       self.needed_locks = {
3893         locking.LEVEL_NODE_RES: locking.ALL_SET,
3894         locking.LEVEL_INSTANCE: locking.ALL_SET,
3895
3896         # This opcode is acquires the node locks for all instances
3897         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3898         }
3899
3900     self.share_locks = {
3901       locking.LEVEL_NODE_RES: 1,
3902       locking.LEVEL_INSTANCE: 0,
3903       locking.LEVEL_NODE_ALLOC: 1,
3904       }
3905
3906   def DeclareLocks(self, level):
3907     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908       self._LockInstancesNodes(primary_only=True, level=level)
3909
3910   def CheckPrereq(self):
3911     """Check prerequisites.
3912
3913     This only checks the optional instance list against the existing names.
3914
3915     """
3916     if self.wanted_names is None:
3917       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3918
3919     self.wanted_instances = \
3920         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3921
3922   def _EnsureChildSizes(self, disk):
3923     """Ensure children of the disk have the needed disk size.
3924
3925     This is valid mainly for DRBD8 and fixes an issue where the
3926     children have smaller disk size.
3927
3928     @param disk: an L{ganeti.objects.Disk} object
3929
3930     """
3931     if disk.dev_type == constants.LD_DRBD8:
3932       assert disk.children, "Empty children for DRBD8?"
3933       fchild = disk.children[0]
3934       mismatch = fchild.size < disk.size
3935       if mismatch:
3936         self.LogInfo("Child disk has size %d, parent %d, fixing",
3937                      fchild.size, disk.size)
3938         fchild.size = disk.size
3939
3940       # and we recurse on this child only, not on the metadev
3941       return self._EnsureChildSizes(fchild) or mismatch
3942     else:
3943       return False
3944
3945   def Exec(self, feedback_fn):
3946     """Verify the size of cluster disks.
3947
3948     """
3949     # TODO: check child disks too
3950     # TODO: check differences in size between primary/secondary nodes
3951     per_node_disks = {}
3952     for instance in self.wanted_instances:
3953       pnode = instance.primary_node
3954       if pnode not in per_node_disks:
3955         per_node_disks[pnode] = []
3956       for idx, disk in enumerate(instance.disks):
3957         per_node_disks[pnode].append((instance, idx, disk))
3958
3959     assert not (frozenset(per_node_disks.keys()) -
3960                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961       "Not owning correct locks"
3962     assert not self.owned_locks(locking.LEVEL_NODE)
3963
3964     changed = []
3965     for node, dskl in per_node_disks.items():
3966       newl = [v[2].Copy() for v in dskl]
3967       for dsk in newl:
3968         self.cfg.SetDiskID(dsk, node)
3969       result = self.rpc.call_blockdev_getsize(node, newl)
3970       if result.fail_msg:
3971         self.LogWarning("Failure in blockdev_getsize call to node"
3972                         " %s, ignoring", node)
3973         continue
3974       if len(result.payload) != len(dskl):
3975         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976                         " result.payload=%s", node, len(dskl), result.payload)
3977         self.LogWarning("Invalid result from node %s, ignoring node results",
3978                         node)
3979         continue
3980       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3981         if size is None:
3982           self.LogWarning("Disk %d of instance %s did not return size"
3983                           " information, ignoring", idx, instance.name)
3984           continue
3985         if not isinstance(size, (int, long)):
3986           self.LogWarning("Disk %d of instance %s did not return valid"
3987                           " size information, ignoring", idx, instance.name)
3988           continue
3989         size = size >> 20
3990         if size != disk.size:
3991           self.LogInfo("Disk %d of instance %s has mismatched size,"
3992                        " correcting: recorded %d, actual %d", idx,
3993                        instance.name, disk.size, size)
3994           disk.size = size
3995           self.cfg.Update(instance, feedback_fn)
3996           changed.append((instance.name, idx, size))
3997         if self._EnsureChildSizes(disk):
3998           self.cfg.Update(instance, feedback_fn)
3999           changed.append((instance.name, idx, disk.size))
4000     return changed
4001
4002
4003 class LUClusterRename(LogicalUnit):
4004   """Rename the cluster.
4005
4006   """
4007   HPATH = "cluster-rename"
4008   HTYPE = constants.HTYPE_CLUSTER
4009
4010   def BuildHooksEnv(self):
4011     """Build hooks env.
4012
4013     """
4014     return {
4015       "OP_TARGET": self.cfg.GetClusterName(),
4016       "NEW_NAME": self.op.name,
4017       }
4018
4019   def BuildHooksNodes(self):
4020     """Build hooks nodes.
4021
4022     """
4023     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4024
4025   def CheckPrereq(self):
4026     """Verify that the passed name is a valid one.
4027
4028     """
4029     hostname = netutils.GetHostname(name=self.op.name,
4030                                     family=self.cfg.GetPrimaryIPFamily())
4031
4032     new_name = hostname.name
4033     self.ip = new_ip = hostname.ip
4034     old_name = self.cfg.GetClusterName()
4035     old_ip = self.cfg.GetMasterIP()
4036     if new_name == old_name and new_ip == old_ip:
4037       raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038                                  " cluster has changed",
4039                                  errors.ECODE_INVAL)
4040     if new_ip != old_ip:
4041       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042         raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043                                    " reachable on the network" %
4044                                    new_ip, errors.ECODE_NOTUNIQUE)
4045
4046     self.op.name = new_name
4047
4048   def Exec(self, feedback_fn):
4049     """Rename the cluster.
4050
4051     """
4052     clustername = self.op.name
4053     new_ip = self.ip
4054
4055     # shutdown the master IP
4056     master_params = self.cfg.GetMasterNetworkParameters()
4057     ems = self.cfg.GetUseExternalMipScript()
4058     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4059                                                      master_params, ems)
4060     result.Raise("Could not disable the master role")
4061
4062     try:
4063       cluster = self.cfg.GetClusterInfo()
4064       cluster.cluster_name = clustername
4065       cluster.master_ip = new_ip
4066       self.cfg.Update(cluster, feedback_fn)
4067
4068       # update the known hosts file
4069       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070       node_list = self.cfg.GetOnlineNodeList()
4071       try:
4072         node_list.remove(master_params.name)
4073       except ValueError:
4074         pass
4075       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4076     finally:
4077       master_params.ip = new_ip
4078       result = self.rpc.call_node_activate_master_ip(master_params.name,
4079                                                      master_params, ems)
4080       msg = result.fail_msg
4081       if msg:
4082         self.LogWarning("Could not re-enable the master role on"
4083                         " the master, please restart manually: %s", msg)
4084
4085     return clustername
4086
4087
4088 def _ValidateNetmask(cfg, netmask):
4089   """Checks if a netmask is valid.
4090
4091   @type cfg: L{config.ConfigWriter}
4092   @param cfg: The cluster configuration
4093   @type netmask: int
4094   @param netmask: the netmask to be verified
4095   @raise errors.OpPrereqError: if the validation fails
4096
4097   """
4098   ip_family = cfg.GetPrimaryIPFamily()
4099   try:
4100     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101   except errors.ProgrammerError:
4102     raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103                                ip_family, errors.ECODE_INVAL)
4104   if not ipcls.ValidateNetmask(netmask):
4105     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106                                 (netmask), errors.ECODE_INVAL)
4107
4108
4109 class LUClusterSetParams(LogicalUnit):
4110   """Change the parameters of the cluster.
4111
4112   """
4113   HPATH = "cluster-modify"
4114   HTYPE = constants.HTYPE_CLUSTER
4115   REQ_BGL = False
4116
4117   def CheckArguments(self):
4118     """Check parameters
4119
4120     """
4121     if self.op.uid_pool:
4122       uidpool.CheckUidPool(self.op.uid_pool)
4123
4124     if self.op.add_uids:
4125       uidpool.CheckUidPool(self.op.add_uids)
4126
4127     if self.op.remove_uids:
4128       uidpool.CheckUidPool(self.op.remove_uids)
4129
4130     if self.op.master_netmask is not None:
4131       _ValidateNetmask(self.cfg, self.op.master_netmask)
4132
4133     if self.op.diskparams:
4134       for dt_params in self.op.diskparams.values():
4135         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4136       try:
4137         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138       except errors.OpPrereqError, err:
4139         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4140                                    errors.ECODE_INVAL)
4141
4142   def ExpandNames(self):
4143     # FIXME: in the future maybe other cluster params won't require checking on
4144     # all nodes to be modified.
4145     # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146     # resource locks the right thing, shouldn't it be the BGL instead?
4147     self.needed_locks = {
4148       locking.LEVEL_NODE: locking.ALL_SET,
4149       locking.LEVEL_INSTANCE: locking.ALL_SET,
4150       locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4152     }
4153     self.share_locks = _ShareAll()
4154
4155   def BuildHooksEnv(self):
4156     """Build hooks env.
4157
4158     """
4159     return {
4160       "OP_TARGET": self.cfg.GetClusterName(),
4161       "NEW_VG_NAME": self.op.vg_name,
4162       }
4163
4164   def BuildHooksNodes(self):
4165     """Build hooks nodes.
4166
4167     """
4168     mn = self.cfg.GetMasterNode()
4169     return ([mn], [mn])
4170
4171   def CheckPrereq(self):
4172     """Check prerequisites.
4173
4174     This checks whether the given params don't conflict and
4175     if the given volume group is valid.
4176
4177     """
4178     if self.op.vg_name is not None and not self.op.vg_name:
4179       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181                                    " instances exist", errors.ECODE_INVAL)
4182
4183     if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185         raise errors.OpPrereqError("Cannot disable drbd helper while"
4186                                    " drbd-based instances exist",
4187                                    errors.ECODE_INVAL)
4188
4189     node_list = self.owned_locks(locking.LEVEL_NODE)
4190
4191     # if vg_name not None, checks given volume group on all nodes
4192     if self.op.vg_name:
4193       vglist = self.rpc.call_vg_list(node_list)
4194       for node in node_list:
4195         msg = vglist[node].fail_msg
4196         if msg:
4197           # ignoring down node
4198           self.LogWarning("Error while gathering data on node %s"
4199                           " (ignoring node): %s", node, msg)
4200           continue
4201         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4202                                               self.op.vg_name,
4203                                               constants.MIN_VG_SIZE)
4204         if vgstatus:
4205           raise errors.OpPrereqError("Error on node '%s': %s" %
4206                                      (node, vgstatus), errors.ECODE_ENVIRON)
4207
4208     if self.op.drbd_helper:
4209       # checks given drbd helper on all nodes
4210       helpers = self.rpc.call_drbd_helper(node_list)
4211       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4212         if ninfo.offline:
4213           self.LogInfo("Not checking drbd helper on offline node %s", node)
4214           continue
4215         msg = helpers[node].fail_msg
4216         if msg:
4217           raise errors.OpPrereqError("Error checking drbd helper on node"
4218                                      " '%s': %s" % (node, msg),
4219                                      errors.ECODE_ENVIRON)
4220         node_helper = helpers[node].payload
4221         if node_helper != self.op.drbd_helper:
4222           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223                                      (node, node_helper), errors.ECODE_ENVIRON)
4224
4225     self.cluster = cluster = self.cfg.GetClusterInfo()
4226     # validate params changes
4227     if self.op.beparams:
4228       objects.UpgradeBeParams(self.op.beparams)
4229       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4231
4232     if self.op.ndparams:
4233       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4235
4236       # TODO: we need a more general way to handle resetting
4237       # cluster-level parameters to default values
4238       if self.new_ndparams["oob_program"] == "":
4239         self.new_ndparams["oob_program"] = \
4240             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4241
4242     if self.op.hv_state:
4243       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244                                             self.cluster.hv_state_static)
4245       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246                                for hv, values in new_hv_state.items())
4247
4248     if self.op.disk_state:
4249       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250                                                 self.cluster.disk_state_static)
4251       self.new_disk_state = \
4252         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253                             for name, values in svalues.items()))
4254              for storage, svalues in new_disk_state.items())
4255
4256     if self.op.ipolicy:
4257       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4258                                             group_policy=False)
4259
4260       all_instances = self.cfg.GetAllInstancesInfo().values()
4261       violations = set()
4262       for group in self.cfg.GetAllNodeGroupsInfo().values():
4263         instances = frozenset([inst for inst in all_instances
4264                                if compat.any(node in group.members
4265                                              for node in inst.all_nodes)])
4266         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268         new = _ComputeNewInstanceViolations(ipol,
4269                                             new_ipolicy, instances)
4270         if new:
4271           violations.update(new)
4272
4273       if violations:
4274         self.LogWarning("After the ipolicy change the following instances"
4275                         " violate them: %s",
4276                         utils.CommaJoin(utils.NiceSort(violations)))
4277
4278     if self.op.nicparams:
4279       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4282       nic_errors = []
4283
4284       # check all instances for consistency
4285       for instance in self.cfg.GetAllInstancesInfo().values():
4286         for nic_idx, nic in enumerate(instance.nics):
4287           params_copy = copy.deepcopy(nic.nicparams)
4288           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4289
4290           # check parameter syntax
4291           try:
4292             objects.NIC.CheckParameterSyntax(params_filled)
4293           except errors.ConfigurationError, err:
4294             nic_errors.append("Instance %s, nic/%d: %s" %
4295                               (instance.name, nic_idx, err))
4296
4297           # if we're moving instances to routed, check that they have an ip
4298           target_mode = params_filled[constants.NIC_MODE]
4299           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301                               " address" % (instance.name, nic_idx))
4302       if nic_errors:
4303         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4305
4306     # hypervisor list/parameters
4307     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308     if self.op.hvparams:
4309       for hv_name, hv_dict in self.op.hvparams.items():
4310         if hv_name not in self.new_hvparams:
4311           self.new_hvparams[hv_name] = hv_dict
4312         else:
4313           self.new_hvparams[hv_name].update(hv_dict)
4314
4315     # disk template parameters
4316     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317     if self.op.diskparams:
4318       for dt_name, dt_params in self.op.diskparams.items():
4319         if dt_name not in self.op.diskparams:
4320           self.new_diskparams[dt_name] = dt_params
4321         else:
4322           self.new_diskparams[dt_name].update(dt_params)
4323
4324     # os hypervisor parameters
4325     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4326     if self.op.os_hvp:
4327       for os_name, hvs in self.op.os_hvp.items():
4328         if os_name not in self.new_os_hvp:
4329           self.new_os_hvp[os_name] = hvs
4330         else:
4331           for hv_name, hv_dict in hvs.items():
4332             if hv_dict is None:
4333               # Delete if it exists
4334               self.new_os_hvp[os_name].pop(hv_name, None)
4335             elif hv_name not in self.new_os_hvp[os_name]:
4336               self.new_os_hvp[os_name][hv_name] = hv_dict
4337             else:
4338               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4339
4340     # os parameters
4341     self.new_osp = objects.FillDict(cluster.osparams, {})
4342     if self.op.osparams:
4343       for os_name, osp in self.op.osparams.items():
4344         if os_name not in self.new_osp:
4345           self.new_osp[os_name] = {}
4346
4347         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4348                                                   use_none=True)
4349
4350         if not self.new_osp[os_name]:
4351           # we removed all parameters
4352           del self.new_osp[os_name]
4353         else:
4354           # check the parameter validity (remote check)
4355           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356                          os_name, self.new_osp[os_name])
4357
4358     # changes to the hypervisor list
4359     if self.op.enabled_hypervisors is not None:
4360       self.hv_list = self.op.enabled_hypervisors
4361       for hv in self.hv_list:
4362         # if the hypervisor doesn't already exist in the cluster
4363         # hvparams, we initialize it to empty, and then (in both
4364         # cases) we make sure to fill the defaults, as we might not
4365         # have a complete defaults list if the hypervisor wasn't
4366         # enabled before
4367         if hv not in new_hvp:
4368           new_hvp[hv] = {}
4369         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4371     else:
4372       self.hv_list = cluster.enabled_hypervisors
4373
4374     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375       # either the enabled list has changed, or the parameters have, validate
4376       for hv_name, hv_params in self.new_hvparams.items():
4377         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378             (self.op.enabled_hypervisors and
4379              hv_name in self.op.enabled_hypervisors)):
4380           # either this is a new hypervisor, or its parameters have changed
4381           hv_class = hypervisor.GetHypervisorClass(hv_name)
4382           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383           hv_class.CheckParameterSyntax(hv_params)
4384           _CheckHVParams(self, node_list, hv_name, hv_params)
4385
4386     if self.op.os_hvp:
4387       # no need to check any newly-enabled hypervisors, since the
4388       # defaults have already been checked in the above code-block
4389       for os_name, os_hvp in self.new_os_hvp.items():
4390         for hv_name, hv_params in os_hvp.items():
4391           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392           # we need to fill in the new os_hvp on top of the actual hv_p
4393           cluster_defaults = self.new_hvparams.get(hv_name, {})
4394           new_osp = objects.FillDict(cluster_defaults, hv_params)
4395           hv_class = hypervisor.GetHypervisorClass(hv_name)
4396           hv_class.CheckParameterSyntax(new_osp)
4397           _CheckHVParams(self, node_list, hv_name, new_osp)
4398
4399     if self.op.default_iallocator:
4400       alloc_script = utils.FindFile(self.op.default_iallocator,
4401                                     constants.IALLOCATOR_SEARCH_PATH,
4402                                     os.path.isfile)
4403       if alloc_script is None:
4404         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405                                    " specified" % self.op.default_iallocator,
4406                                    errors.ECODE_INVAL)
4407
4408   def Exec(self, feedback_fn):
4409     """Change the parameters of the cluster.
4410
4411     """
4412     if self.op.vg_name is not None:
4413       new_volume = self.op.vg_name
4414       if not new_volume:
4415         new_volume = None
4416       if new_volume != self.cfg.GetVGName():
4417         self.cfg.SetVGName(new_volume)
4418       else:
4419         feedback_fn("Cluster LVM configuration already in desired"
4420                     " state, not changing")
4421     if self.op.drbd_helper is not None:
4422       new_helper = self.op.drbd_helper
4423       if not new_helper:
4424         new_helper = None
4425       if new_helper != self.cfg.GetDRBDHelper():
4426         self.cfg.SetDRBDHelper(new_helper)
4427       else:
4428         feedback_fn("Cluster DRBD helper already in desired state,"
4429                     " not changing")
4430     if self.op.hvparams:
4431       self.cluster.hvparams = self.new_hvparams
4432     if self.op.os_hvp:
4433       self.cluster.os_hvp = self.new_os_hvp
4434     if self.op.enabled_hypervisors is not None:
4435       self.cluster.hvparams = self.new_hvparams
4436       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437     if self.op.beparams:
4438       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439     if self.op.nicparams:
4440       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4441     if self.op.ipolicy:
4442       self.cluster.ipolicy = self.new_ipolicy
4443     if self.op.osparams:
4444       self.cluster.osparams = self.new_osp
4445     if self.op.ndparams:
4446       self.cluster.ndparams = self.new_ndparams
4447     if self.op.diskparams:
4448       self.cluster.diskparams = self.new_diskparams
4449     if self.op.hv_state:
4450       self.cluster.hv_state_static = self.new_hv_state
4451     if self.op.disk_state:
4452       self.cluster.disk_state_static = self.new_disk_state
4453
4454     if self.op.candidate_pool_size is not None:
4455       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456       # we need to update the pool size here, otherwise the save will fail
4457       _AdjustCandidatePool(self, [])
4458
4459     if self.op.maintain_node_health is not None:
4460       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461         feedback_fn("Note: CONFD was disabled at build time, node health"
4462                     " maintenance is not useful (still enabling it)")
4463       self.cluster.maintain_node_health = self.op.maintain_node_health
4464
4465     if self.op.prealloc_wipe_disks is not None:
4466       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4467
4468     if self.op.add_uids is not None:
4469       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4470
4471     if self.op.remove_uids is not None:
4472       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4473
4474     if self.op.uid_pool is not None:
4475       self.cluster.uid_pool = self.op.uid_pool
4476
4477     if self.op.default_iallocator is not None:
4478       self.cluster.default_iallocator = self.op.default_iallocator
4479
4480     if self.op.reserved_lvs is not None:
4481       self.cluster.reserved_lvs = self.op.reserved_lvs
4482
4483     if self.op.use_external_mip_script is not None:
4484       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4485
4486     def helper_os(aname, mods, desc):
4487       desc += " OS list"
4488       lst = getattr(self.cluster, aname)
4489       for key, val in mods:
4490         if key == constants.DDM_ADD:
4491           if val in lst:
4492             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4493           else:
4494             lst.append(val)
4495         elif key == constants.DDM_REMOVE:
4496           if val in lst:
4497             lst.remove(val)
4498           else:
4499             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4500         else:
4501           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4502
4503     if self.op.hidden_os:
4504       helper_os("hidden_os", self.op.hidden_os, "hidden")
4505
4506     if self.op.blacklisted_os:
4507       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4508
4509     if self.op.master_netdev:
4510       master_params = self.cfg.GetMasterNetworkParameters()
4511       ems = self.cfg.GetUseExternalMipScript()
4512       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513                   self.cluster.master_netdev)
4514       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4515                                                        master_params, ems)
4516       result.Raise("Could not disable the master ip")
4517       feedback_fn("Changing master_netdev from %s to %s" %
4518                   (master_params.netdev, self.op.master_netdev))
4519       self.cluster.master_netdev = self.op.master_netdev
4520
4521     if self.op.master_netmask:
4522       master_params = self.cfg.GetMasterNetworkParameters()
4523       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524       result = self.rpc.call_node_change_master_netmask(master_params.name,
4525                                                         master_params.netmask,
4526                                                         self.op.master_netmask,
4527                                                         master_params.ip,
4528                                                         master_params.netdev)
4529       if result.fail_msg:
4530         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4531         feedback_fn(msg)
4532
4533       self.cluster.master_netmask = self.op.master_netmask
4534
4535     self.cfg.Update(self.cluster, feedback_fn)
4536
4537     if self.op.master_netdev:
4538       master_params = self.cfg.GetMasterNetworkParameters()
4539       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540                   self.op.master_netdev)
4541       ems = self.cfg.GetUseExternalMipScript()
4542       result = self.rpc.call_node_activate_master_ip(master_params.name,
4543                                                      master_params, ems)
4544       if result.fail_msg:
4545         self.LogWarning("Could not re-enable the master ip on"
4546                         " the master, please restart manually: %s",
4547                         result.fail_msg)
4548
4549
4550 def _UploadHelper(lu, nodes, fname):
4551   """Helper for uploading a file and showing warnings.
4552
4553   """
4554   if os.path.exists(fname):
4555     result = lu.rpc.call_upload_file(nodes, fname)
4556     for to_node, to_result in result.items():
4557       msg = to_result.fail_msg
4558       if msg:
4559         msg = ("Copy of file %s to node %s failed: %s" %
4560                (fname, to_node, msg))
4561         lu.LogWarning(msg)
4562
4563
4564 def _ComputeAncillaryFiles(cluster, redist):
4565   """Compute files external to Ganeti which need to be consistent.
4566
4567   @type redist: boolean
4568   @param redist: Whether to include files which need to be redistributed
4569
4570   """
4571   # Compute files for all nodes
4572   files_all = set([
4573     pathutils.SSH_KNOWN_HOSTS_FILE,
4574     pathutils.CONFD_HMAC_KEY,
4575     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576     pathutils.SPICE_CERT_FILE,
4577     pathutils.SPICE_CACERT_FILE,
4578     pathutils.RAPI_USERS_FILE,
4579     ])
4580
4581   if redist:
4582     # we need to ship at least the RAPI certificate
4583     files_all.add(pathutils.RAPI_CERT_FILE)
4584   else:
4585     files_all.update(pathutils.ALL_CERT_FILES)
4586     files_all.update(ssconf.SimpleStore().GetFileList())
4587
4588   if cluster.modify_etc_hosts:
4589     files_all.add(pathutils.ETC_HOSTS)
4590
4591   if cluster.use_external_mip_script:
4592     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4593
4594   # Files which are optional, these must:
4595   # - be present in one other category as well
4596   # - either exist or not exist on all nodes of that category (mc, vm all)
4597   files_opt = set([
4598     pathutils.RAPI_USERS_FILE,
4599     ])
4600
4601   # Files which should only be on master candidates
4602   files_mc = set()
4603
4604   if not redist:
4605     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4606
4607   # File storage
4608   if (not redist and
4609       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4612
4613   # Files which should only be on VM-capable nodes
4614   files_vm = set(
4615     filename
4616     for hv_name in cluster.enabled_hypervisors
4617     for filename in
4618       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4619
4620   files_opt |= set(
4621     filename
4622     for hv_name in cluster.enabled_hypervisors
4623     for filename in
4624       hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4625
4626   # Filenames in each category must be unique
4627   all_files_set = files_all | files_mc | files_vm
4628   assert (len(all_files_set) ==
4629           sum(map(len, [files_all, files_mc, files_vm]))), \
4630          "Found file listed in more than one file list"
4631
4632   # Optional files must be present in one other category
4633   assert all_files_set.issuperset(files_opt), \
4634          "Optional file not in a different required list"
4635
4636   # This one file should never ever be re-distributed via RPC
4637   assert not (redist and
4638               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4639
4640   return (files_all, files_opt, files_mc, files_vm)
4641
4642
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644   """Distribute additional files which are part of the cluster configuration.
4645
4646   ConfigWriter takes care of distributing the config and ssconf files, but
4647   there are more files which should be distributed to all nodes. This function
4648   makes sure those are copied.
4649
4650   @param lu: calling logical unit
4651   @param additional_nodes: list of nodes not in the config to distribute to
4652   @type additional_vm: boolean
4653   @param additional_vm: whether the additional nodes are vm-capable or not
4654
4655   """
4656   # Gather target nodes
4657   cluster = lu.cfg.GetClusterInfo()
4658   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4659
4660   online_nodes = lu.cfg.GetOnlineNodeList()
4661   online_set = frozenset(online_nodes)
4662   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4663
4664   if additional_nodes is not None:
4665     online_nodes.extend(additional_nodes)
4666     if additional_vm:
4667       vm_nodes.extend(additional_nodes)
4668
4669   # Never distribute to master node
4670   for nodelist in [online_nodes, vm_nodes]:
4671     if master_info.name in nodelist:
4672       nodelist.remove(master_info.name)
4673
4674   # Gather file lists
4675   (files_all, _, files_mc, files_vm) = \
4676     _ComputeAncillaryFiles(cluster, True)
4677
4678   # Never re-distribute configuration file from here
4679   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680               pathutils.CLUSTER_CONF_FILE in files_vm)
4681   assert not files_mc, "Master candidates not handled in this function"
4682
4683   filemap = [
4684     (online_nodes, files_all),
4685     (vm_nodes, files_vm),
4686     ]
4687
4688   # Upload the files
4689   for (node_list, files) in filemap:
4690     for fname in files:
4691       _UploadHelper(lu, node_list, fname)
4692
4693
4694 class LUClusterRedistConf(NoHooksLU):
4695   """Force the redistribution of cluster configuration.
4696
4697   This is a very simple LU.
4698
4699   """
4700   REQ_BGL = False
4701
4702   def ExpandNames(self):
4703     self.needed_locks = {
4704       locking.LEVEL_NODE: locking.ALL_SET,
4705       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4706     }
4707     self.share_locks = _ShareAll()
4708
4709   def Exec(self, feedback_fn):
4710     """Redistribute the configuration.
4711
4712     """
4713     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714     _RedistributeAncillaryFiles(self)
4715
4716
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718   """Activate the master IP on the master node.
4719
4720   """
4721   def Exec(self, feedback_fn):
4722     """Activate the master IP.
4723
4724     """
4725     master_params = self.cfg.GetMasterNetworkParameters()
4726     ems = self.cfg.GetUseExternalMipScript()
4727     result = self.rpc.call_node_activate_master_ip(master_params.name,
4728                                                    master_params, ems)
4729     result.Raise("Could not activate the master IP")
4730
4731
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733   """Deactivate the master IP on the master node.
4734
4735   """
4736   def Exec(self, feedback_fn):
4737     """Deactivate the master IP.
4738
4739     """
4740     master_params = self.cfg.GetMasterNetworkParameters()
4741     ems = self.cfg.GetUseExternalMipScript()
4742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4743                                                      master_params, ems)
4744     result.Raise("Could not deactivate the master IP")
4745
4746
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748   """Sleep and poll for an instance's disk to sync.
4749
4750   """
4751   if not instance.disks or disks is not None and not disks:
4752     return True
4753
4754   disks = _ExpandCheckDisks(instance, disks)
4755
4756   if not oneshot:
4757     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4758
4759   node = instance.primary_node
4760
4761   for dev in disks:
4762     lu.cfg.SetDiskID(dev, node)
4763
4764   # TODO: Convert to utils.Retry
4765
4766   retries = 0
4767   degr_retries = 10 # in seconds, as we sleep 1 second each time
4768   while True:
4769     max_time = 0
4770     done = True
4771     cumul_degraded = False
4772     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773     msg = rstats.fail_msg
4774     if msg:
4775       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4776       retries += 1
4777       if retries >= 10:
4778         raise errors.RemoteError("Can't contact node %s for mirror data,"
4779                                  " aborting." % node)
4780       time.sleep(6)
4781       continue
4782     rstats = rstats.payload
4783     retries = 0
4784     for i, mstat in enumerate(rstats):
4785       if mstat is None:
4786         lu.LogWarning("Can't compute data for node %s/%s",
4787                            node, disks[i].iv_name)
4788         continue
4789
4790       cumul_degraded = (cumul_degraded or
4791                         (mstat.is_degraded and mstat.sync_percent is None))
4792       if mstat.sync_percent is not None:
4793         done = False
4794         if mstat.estimated_time is not None:
4795           rem_time = ("%s remaining (estimated)" %
4796                       utils.FormatSeconds(mstat.estimated_time))
4797           max_time = mstat.estimated_time
4798         else:
4799           rem_time = "no time estimate"
4800         lu.LogInfo("- device %s: %5.2f%% done, %s",
4801                    disks[i].iv_name, mstat.sync_percent, rem_time)
4802
4803     # if we're done but degraded, let's do a few small retries, to
4804     # make sure we see a stable and not transient situation; therefore
4805     # we force restart of the loop
4806     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807       logging.info("Degraded disks found, %d retries left", degr_retries)
4808       degr_retries -= 1
4809       time.sleep(1)
4810       continue
4811
4812     if done or oneshot:
4813       break
4814
4815     time.sleep(min(60, max_time))
4816
4817   if done:
4818     lu.LogInfo("Instance %s's disks are in sync", instance.name)
4819
4820   return not cumul_degraded
4821
4822
4823 def _BlockdevFind(lu, node, dev, instance):
4824   """Wrapper around call_blockdev_find to annotate diskparams.
4825
4826   @param lu: A reference to the lu object
4827   @param node: The node to call out
4828   @param dev: The device to find
4829   @param instance: The instance object the device belongs to
4830   @returns The result of the rpc call
4831
4832   """
4833   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834   return lu.rpc.call_blockdev_find(node, disk)
4835
4836
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838   """Wrapper around L{_CheckDiskConsistencyInner}.
4839
4840   """
4841   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4843                                     ldisk=ldisk)
4844
4845
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4847                                ldisk=False):
4848   """Check that mirrors are not degraded.
4849
4850   @attention: The device has to be annotated already.
4851
4852   The ldisk parameter, if True, will change the test from the
4853   is_degraded attribute (which represents overall non-ok status for
4854   the device(s)) to the ldisk (representing the local storage status).
4855
4856   """
4857   lu.cfg.SetDiskID(dev, node)
4858
4859   result = True
4860
4861   if on_primary or dev.AssembleOnSecondary():
4862     rstats = lu.rpc.call_blockdev_find(node, dev)
4863     msg = rstats.fail_msg
4864     if msg:
4865       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4866       result = False
4867     elif not rstats.payload:
4868       lu.LogWarning("Can't find disk on node %s", node)
4869       result = False
4870     else:
4871       if ldisk:
4872         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4873       else:
4874         result = result and not rstats.payload.is_degraded
4875
4876   if dev.children:
4877     for child in dev.children:
4878       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4879                                                      on_primary)
4880
4881   return result
4882
4883
4884 class LUOobCommand(NoHooksLU):
4885   """Logical unit for OOB handling.
4886
4887   """
4888   REQ_BGL = False
4889   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4890
4891   def ExpandNames(self):
4892     """Gather locks we need.
4893
4894     """
4895     if self.op.node_names:
4896       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897       lock_names = self.op.node_names
4898     else:
4899       lock_names = locking.ALL_SET
4900
4901     self.needed_locks = {
4902       locking.LEVEL_NODE: lock_names,
4903       }
4904
4905     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4906
4907     if not self.op.node_names:
4908       # Acquire node allocation lock only if all nodes are affected
4909       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4910
4911   def CheckPrereq(self):
4912     """Check prerequisites.
4913
4914     This checks:
4915      - the node exists in the configuration
4916      - OOB is supported
4917
4918     Any errors are signaled by raising errors.OpPrereqError.
4919
4920     """
4921     self.nodes = []
4922     self.master_node = self.cfg.GetMasterNode()
4923
4924     assert self.op.power_delay >= 0.0
4925
4926     if self.op.node_names:
4927       if (self.op.command in self._SKIP_MASTER and
4928           self.master_node in self.op.node_names):
4929         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4931
4932         if master_oob_handler:
4933           additional_text = ("run '%s %s %s' if you want to operate on the"
4934                              " master regardless") % (master_oob_handler,
4935                                                       self.op.command,
4936                                                       self.master_node)
4937         else:
4938           additional_text = "it does not support out-of-band operations"
4939
4940         raise errors.OpPrereqError(("Operating on the master node %s is not"
4941                                     " allowed for %s; %s") %
4942                                    (self.master_node, self.op.command,
4943                                     additional_text), errors.ECODE_INVAL)
4944     else:
4945       self.op.node_names = self.cfg.GetNodeList()
4946       if self.op.command in self._SKIP_MASTER:
4947         self.op.node_names.remove(self.master_node)
4948
4949     if self.op.command in self._SKIP_MASTER:
4950       assert self.master_node not in self.op.node_names
4951
4952     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4953       if node is None:
4954         raise errors.OpPrereqError("Node %s not found" % node_name,
4955                                    errors.ECODE_NOENT)
4956       else:
4957         self.nodes.append(node)
4958
4959       if (not self.op.ignore_status and
4960           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962                                     " not marked offline") % node_name,
4963                                    errors.ECODE_STATE)
4964
4965   def Exec(self, feedback_fn):
4966     """Execute OOB and return result if we expect any.
4967
4968     """
4969     master_node = self.master_node
4970     ret = []
4971
4972     for idx, node in enumerate(utils.NiceSort(self.nodes,
4973                                               key=lambda node: node.name)):
4974       node_entry = [(constants.RS_NORMAL, node.name)]
4975       ret.append(node_entry)
4976
4977       oob_program = _SupportsOob(self.cfg, node)
4978
4979       if not oob_program:
4980         node_entry.append((constants.RS_UNAVAIL, None))
4981         continue
4982
4983       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984                    self.op.command, oob_program, node.name)
4985       result = self.rpc.call_run_oob(master_node, oob_program,
4986                                      self.op.command, node.name,
4987                                      self.op.timeout)
4988
4989       if result.fail_msg:
4990         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991                         node.name, result.fail_msg)
4992         node_entry.append((constants.RS_NODATA, None))
4993       else:
4994         try:
4995           self._CheckPayload(result)
4996         except errors.OpExecError, err:
4997           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4998                           node.name, err)
4999           node_entry.append((constants.RS_NODATA, None))
5000         else:
5001           if self.op.command == constants.OOB_HEALTH:
5002             # For health we should log important events
5003             for item, status in result.payload:
5004               if status in [constants.OOB_STATUS_WARNING,
5005                             constants.OOB_STATUS_CRITICAL]:
5006                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007                                 item, node.name, status)
5008
5009           if self.op.command == constants.OOB_POWER_ON:
5010             node.powered = True
5011           elif self.op.command == constants.OOB_POWER_OFF:
5012             node.powered = False
5013           elif self.op.command == constants.OOB_POWER_STATUS:
5014             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015             if powered != node.powered:
5016               logging.warning(("Recorded power state (%s) of node '%s' does not"
5017                                " match actual power state (%s)"), node.powered,
5018                               node.name, powered)
5019
5020           # For configuration changing commands we should update the node
5021           if self.op.command in (constants.OOB_POWER_ON,
5022                                  constants.OOB_POWER_OFF):
5023             self.cfg.Update(node, feedback_fn)
5024
5025           node_entry.append((constants.RS_NORMAL, result.payload))
5026
5027           if (self.op.command == constants.OOB_POWER_ON and
5028               idx < len(self.nodes) - 1):
5029             time.sleep(self.op.power_delay)
5030
5031     return ret
5032
5033   def _CheckPayload(self, result):
5034     """Checks if the payload is valid.
5035
5036     @param result: RPC result
5037     @raises errors.OpExecError: If payload is not valid
5038
5039     """
5040     errs = []
5041     if self.op.command == constants.OOB_HEALTH:
5042       if not isinstance(result.payload, list):
5043         errs.append("command 'health' is expected to return a list but got %s" %
5044                     type(result.payload))
5045       else:
5046         for item, status in result.payload:
5047           if status not in constants.OOB_STATUSES:
5048             errs.append("health item '%s' has invalid status '%s'" %
5049                         (item, status))
5050
5051     if self.op.command == constants.OOB_POWER_STATUS:
5052       if not isinstance(result.payload, dict):
5053         errs.append("power-status is expected to return a dict but got %s" %
5054                     type(result.payload))
5055
5056     if self.op.command in [
5057       constants.OOB_POWER_ON,
5058       constants.OOB_POWER_OFF,
5059       constants.OOB_POWER_CYCLE,
5060       ]:
5061       if result.payload is not None:
5062         errs.append("%s is expected to not return payload but got '%s'" %
5063                     (self.op.command, result.payload))
5064
5065     if errs:
5066       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067                                utils.CommaJoin(errs))
5068
5069
5070 class _OsQuery(_QueryBase):
5071   FIELDS = query.OS_FIELDS
5072
5073   def ExpandNames(self, lu):
5074     # Lock all nodes in shared mode
5075     # Temporary removal of locks, should be reverted later
5076     # TODO: reintroduce locks when they are lighter-weight
5077     lu.needed_locks = {}
5078     #self.share_locks[locking.LEVEL_NODE] = 1
5079     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5080
5081     # The following variables interact with _QueryBase._GetNames
5082     if self.names:
5083       self.wanted = self.names
5084     else:
5085       self.wanted = locking.ALL_SET
5086
5087     self.do_locking = self.use_locking
5088
5089   def DeclareLocks(self, lu, level):
5090     pass
5091
5092   @staticmethod
5093   def _DiagnoseByOS(rlist):
5094     """Remaps a per-node return list into an a per-os per-node dictionary
5095
5096     @param rlist: a map with node names as keys and OS objects as values
5097
5098     @rtype: dict
5099     @return: a dictionary with osnames as keys and as value another
5100         map, with nodes as keys and tuples of (path, status, diagnose,
5101         variants, parameters, api_versions) as values, eg::
5102
5103           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104                                      (/srv/..., False, "invalid api")],
5105                            "node2": [(/srv/..., True, "", [], [])]}
5106           }
5107
5108     """
5109     all_os = {}
5110     # we build here the list of nodes that didn't fail the RPC (at RPC
5111     # level), so that nodes with a non-responding node daemon don't
5112     # make all OSes invalid
5113     good_nodes = [node_name for node_name in rlist
5114                   if not rlist[node_name].fail_msg]
5115     for node_name, nr in rlist.items():
5116       if nr.fail_msg or not nr.payload:
5117         continue
5118       for (name, path, status, diagnose, variants,
5119            params, api_versions) in nr.payload:
5120         if name not in all_os:
5121           # build a list of nodes for this os containing empty lists
5122           # for each node in node_list
5123           all_os[name] = {}
5124           for nname in good_nodes:
5125             all_os[name][nname] = []
5126         # convert params from [name, help] to (name, help)
5127         params = [tuple(v) for v in params]
5128         all_os[name][node_name].append((path, status, diagnose,
5129                                         variants, params, api_versions))
5130     return all_os
5131
5132   def _GetQueryData(self, lu):
5133     """Computes the list of nodes and their attributes.
5134
5135     """
5136     # Locking is not used
5137     assert not (compat.any(lu.glm.is_owned(level)
5138                            for level in locking.LEVELS
5139                            if level != locking.LEVEL_CLUSTER) or
5140                 self.do_locking or self.use_locking)
5141
5142     valid_nodes = [node.name
5143                    for node in lu.cfg.GetAllNodesInfo().values()
5144                    if not node.offline and node.vm_capable]
5145     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146     cluster = lu.cfg.GetClusterInfo()
5147
5148     data = {}
5149
5150     for (os_name, os_data) in pol.items():
5151       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152                           hidden=(os_name in cluster.hidden_os),
5153                           blacklisted=(os_name in cluster.blacklisted_os))
5154
5155       variants = set()
5156       parameters = set()
5157       api_versions = set()
5158
5159       for idx, osl in enumerate(os_data.values()):
5160         info.valid = bool(info.valid and osl and osl[0][1])
5161         if not info.valid:
5162           break
5163
5164         (node_variants, node_params, node_api) = osl[0][3:6]
5165         if idx == 0:
5166           # First entry
5167           variants.update(node_variants)
5168           parameters.update(node_params)
5169           api_versions.update(node_api)
5170         else:
5171           # Filter out inconsistent values
5172           variants.intersection_update(node_variants)
5173           parameters.intersection_update(node_params)
5174           api_versions.intersection_update(node_api)
5175
5176       info.variants = list(variants)
5177       info.parameters = list(parameters)
5178       info.api_versions = list(api_versions)
5179
5180       data[os_name] = info
5181
5182     # Prepare data in requested order
5183     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5184             if name in data]
5185
5186
5187 class LUOsDiagnose(NoHooksLU):
5188   """Logical unit for OS diagnose/query.
5189
5190   """
5191   REQ_BGL = False
5192
5193   @staticmethod
5194   def _BuildFilter(fields, names):
5195     """Builds a filter for querying OSes.
5196
5197     """
5198     name_filter = qlang.MakeSimpleFilter("name", names)
5199
5200     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201     # respective field is not requested
5202     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203                      for fname in ["hidden", "blacklisted"]
5204                      if fname not in fields]
5205     if "valid" not in fields:
5206       status_filter.append([qlang.OP_TRUE, "valid"])
5207
5208     if status_filter:
5209       status_filter.insert(0, qlang.OP_AND)
5210     else:
5211       status_filter = None
5212
5213     if name_filter and status_filter:
5214       return [qlang.OP_AND, name_filter, status_filter]
5215     elif name_filter:
5216       return name_filter
5217     else:
5218       return status_filter
5219
5220   def CheckArguments(self):
5221     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222                        self.op.output_fields, False)
5223
5224   def ExpandNames(self):
5225     self.oq.ExpandNames(self)
5226
5227   def Exec(self, feedback_fn):
5228     return self.oq.OldStyleQuery(self)
5229
5230
5231 class _ExtStorageQuery(_QueryBase):
5232   FIELDS = query.EXTSTORAGE_FIELDS
5233
5234   def ExpandNames(self, lu):
5235     # Lock all nodes in shared mode
5236     # Temporary removal of locks, should be reverted later
5237     # TODO: reintroduce locks when they are lighter-weight
5238     lu.needed_locks = {}
5239     #self.share_locks[locking.LEVEL_NODE] = 1
5240     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5241
5242     # The following variables interact with _QueryBase._GetNames
5243     if self.names:
5244       self.wanted = self.names
5245     else:
5246       self.wanted = locking.ALL_SET
5247
5248     self.do_locking = self.use_locking
5249
5250   def DeclareLocks(self, lu, level):
5251     pass
5252
5253   @staticmethod
5254   def _DiagnoseByProvider(rlist):
5255     """Remaps a per-node return list into an a per-provider per-node dictionary
5256
5257     @param rlist: a map with node names as keys and ExtStorage objects as values
5258
5259     @rtype: dict
5260     @return: a dictionary with extstorage providers as keys and as
5261         value another map, with nodes as keys and tuples of
5262         (path, status, diagnose, parameters) as values, eg::
5263
5264           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265                          "node2": [(/srv/..., False, "missing file")]
5266                          "node3": [(/srv/..., True, "", [])]
5267           }
5268
5269     """
5270     all_es = {}
5271     # we build here the list of nodes that didn't fail the RPC (at RPC
5272     # level), so that nodes with a non-responding node daemon don't
5273     # make all OSes invalid
5274     good_nodes = [node_name for node_name in rlist
5275                   if not rlist[node_name].fail_msg]
5276     for node_name, nr in rlist.items():
5277       if nr.fail_msg or not nr.payload:
5278         continue
5279       for (name, path, status, diagnose, params) in nr.payload:
5280         if name not in all_es:
5281           # build a list of nodes for this os containing empty lists
5282           # for each node in node_list
5283           all_es[name] = {}
5284           for nname in good_nodes:
5285             all_es[name][nname] = []
5286         # convert params from [name, help] to (name, help)
5287         params = [tuple(v) for v in params]
5288         all_es[name][node_name].append((path, status, diagnose, params))
5289     return all_es
5290
5291   def _GetQueryData(self, lu):
5292     """Computes the list of nodes and their attributes.
5293
5294     """
5295     # Locking is not used
5296     assert not (compat.any(lu.glm.is_owned(level)
5297                            for level in locking.LEVELS
5298                            if level != locking.LEVEL_CLUSTER) or
5299                 self.do_locking or self.use_locking)
5300
5301     valid_nodes = [node.name
5302                    for node in lu.cfg.GetAllNodesInfo().values()
5303                    if not node.offline and node.vm_capable]
5304     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5305
5306     data = {}
5307
5308     nodegroup_list = lu.cfg.GetNodeGroupList()
5309
5310     for (es_name, es_data) in pol.items():
5311       # For every provider compute the nodegroup validity.
5312       # To do this we need to check the validity of each node in es_data
5313       # and then construct the corresponding nodegroup dict:
5314       #      { nodegroup1: status
5315       #        nodegroup2: status
5316       #      }
5317       ndgrp_data = {}
5318       for nodegroup in nodegroup_list:
5319         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5320
5321         nodegroup_nodes = ndgrp.members
5322         nodegroup_name = ndgrp.name
5323         node_statuses = []
5324
5325         for node in nodegroup_nodes:
5326           if node in valid_nodes:
5327             if es_data[node] != []:
5328               node_status = es_data[node][0][1]
5329               node_statuses.append(node_status)
5330             else:
5331               node_statuses.append(False)
5332
5333         if False in node_statuses:
5334           ndgrp_data[nodegroup_name] = False
5335         else:
5336           ndgrp_data[nodegroup_name] = True
5337
5338       # Compute the provider's parameters
5339       parameters = set()
5340       for idx, esl in enumerate(es_data.values()):
5341         valid = bool(esl and esl[0][1])
5342         if not valid:
5343           break
5344
5345         node_params = esl[0][3]
5346         if idx == 0:
5347           # First entry
5348           parameters.update(node_params)
5349         else:
5350           # Filter out inconsistent values
5351           parameters.intersection_update(node_params)
5352
5353       params = list(parameters)
5354
5355       # Now fill all the info for this provider
5356       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357                                   nodegroup_status=ndgrp_data,
5358                                   parameters=params)
5359
5360       data[es_name] = info
5361
5362     # Prepare data in requested order
5363     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5364             if name in data]
5365
5366
5367 class LUExtStorageDiagnose(NoHooksLU):
5368   """Logical unit for ExtStorage diagnose/query.
5369
5370   """
5371   REQ_BGL = False
5372
5373   def CheckArguments(self):
5374     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375                                self.op.output_fields, False)
5376
5377   def ExpandNames(self):
5378     self.eq.ExpandNames(self)
5379
5380   def Exec(self, feedback_fn):
5381     return self.eq.OldStyleQuery(self)
5382
5383
5384 class LUNodeRemove(LogicalUnit):
5385   """Logical unit for removing a node.
5386
5387   """
5388   HPATH = "node-remove"
5389   HTYPE = constants.HTYPE_NODE
5390
5391   def BuildHooksEnv(self):
5392     """Build hooks env.
5393
5394     """
5395     return {
5396       "OP_TARGET": self.op.node_name,
5397       "NODE_NAME": self.op.node_name,
5398       }
5399
5400   def BuildHooksNodes(self):
5401     """Build hooks nodes.
5402
5403     This doesn't run on the target node in the pre phase as a failed
5404     node would then be impossible to remove.
5405
5406     """
5407     all_nodes = self.cfg.GetNodeList()
5408     try:
5409       all_nodes.remove(self.op.node_name)
5410     except ValueError:
5411       pass
5412     return (all_nodes, all_nodes)
5413
5414   def CheckPrereq(self):
5415     """Check prerequisites.
5416
5417     This checks:
5418      - the node exists in the configuration
5419      - it does not have primary or secondary instances
5420      - it's not the master
5421
5422     Any errors are signaled by raising errors.OpPrereqError.
5423
5424     """
5425     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426     node = self.cfg.GetNodeInfo(self.op.node_name)
5427     assert node is not None
5428
5429     masternode = self.cfg.GetMasterNode()
5430     if node.name == masternode:
5431       raise errors.OpPrereqError("Node is the master node, failover to another"
5432                                  " node is required", errors.ECODE_INVAL)
5433
5434     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435       if node.name in instance.all_nodes:
5436         raise errors.OpPrereqError("Instance %s is still running on the node,"
5437                                    " please remove first" % instance_name,
5438                                    errors.ECODE_INVAL)
5439     self.op.node_name = node.name
5440     self.node = node
5441
5442   def Exec(self, feedback_fn):
5443     """Removes the node from the cluster.
5444
5445     """
5446     node = self.node
5447     logging.info("Stopping the node daemon and removing configs from node %s",
5448                  node.name)
5449
5450     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5451
5452     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5453       "Not owning BGL"
5454
5455     # Promote nodes to master candidate as needed
5456     _AdjustCandidatePool(self, exceptions=[node.name])
5457     self.context.RemoveNode(node.name)
5458
5459     # Run post hooks on the node before it's removed
5460     _RunPostHook(self, node.name)
5461
5462     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463     msg = result.fail_msg
5464     if msg:
5465       self.LogWarning("Errors encountered on the remote node while leaving"
5466                       " the cluster: %s", msg)
5467
5468     # Remove node from our /etc/hosts
5469     if self.cfg.GetClusterInfo().modify_etc_hosts:
5470       master_node = self.cfg.GetMasterNode()
5471       result = self.rpc.call_etc_hosts_modify(master_node,
5472                                               constants.ETC_HOSTS_REMOVE,
5473                                               node.name, None)
5474       result.Raise("Can't update hosts file with new host data")
5475       _RedistributeAncillaryFiles(self)
5476
5477
5478 class _NodeQuery(_QueryBase):
5479   FIELDS = query.NODE_FIELDS
5480
5481   def ExpandNames(self, lu):
5482     lu.needed_locks = {}
5483     lu.share_locks = _ShareAll()
5484
5485     if self.names:
5486       self.wanted = _GetWantedNodes(lu, self.names)
5487     else:
5488       self.wanted = locking.ALL_SET
5489
5490     self.do_locking = (self.use_locking and
5491                        query.NQ_LIVE in self.requested_data)
5492
5493     if self.do_locking:
5494       # If any non-static field is requested we need to lock the nodes
5495       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5497
5498   def DeclareLocks(self, lu, level):
5499     pass
5500
5501   def _GetQueryData(self, lu):
5502     """Computes the list of nodes and their attributes.
5503
5504     """
5505     all_info = lu.cfg.GetAllNodesInfo()
5506
5507     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5508
5509     # Gather data as requested
5510     if query.NQ_LIVE in self.requested_data:
5511       # filter out non-vm_capable nodes
5512       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5513
5514       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516                                         [lu.cfg.GetHypervisorType()], es_flags)
5517       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518                        for (name, nresult) in node_data.items()
5519                        if not nresult.fail_msg and nresult.payload)
5520     else:
5521       live_data = None
5522
5523     if query.NQ_INST in self.requested_data:
5524       node_to_primary = dict([(name, set()) for name in nodenames])
5525       node_to_secondary = dict([(name, set()) for name in nodenames])
5526
5527       inst_data = lu.cfg.GetAllInstancesInfo()
5528
5529       for inst in inst_data.values():
5530         if inst.primary_node in node_to_primary:
5531           node_to_primary[inst.primary_node].add(inst.name)
5532         for secnode in inst.secondary_nodes:
5533           if secnode in node_to_secondary:
5534             node_to_secondary[secnode].add(inst.name)
5535     else:
5536       node_to_primary = None
5537       node_to_secondary = None
5538
5539     if query.NQ_OOB in self.requested_data:
5540       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541                          for name, node in all_info.iteritems())
5542     else:
5543       oob_support = None
5544
5545     if query.NQ_GROUP in self.requested_data:
5546       groups = lu.cfg.GetAllNodeGroupsInfo()
5547     else:
5548       groups = {}
5549
5550     return query.NodeQueryData([all_info[name] for name in nodenames],
5551                                live_data, lu.cfg.GetMasterNode(),
5552                                node_to_primary, node_to_secondary, groups,
5553                                oob_support, lu.cfg.GetClusterInfo())
5554
5555
5556 class LUNodeQuery(NoHooksLU):
5557   """Logical unit for querying nodes.
5558
5559   """
5560   # pylint: disable=W0142
5561   REQ_BGL = False
5562
5563   def CheckArguments(self):
5564     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565                          self.op.output_fields, self.op.use_locking)
5566
5567   def ExpandNames(self):
5568     self.nq.ExpandNames(self)
5569
5570   def DeclareLocks(self, level):
5571     self.nq.DeclareLocks(self, level)
5572
5573   def Exec(self, feedback_fn):
5574     return self.nq.OldStyleQuery(self)
5575
5576
5577 class LUNodeQueryvols(NoHooksLU):
5578   """Logical unit for getting volumes on node(s).
5579
5580   """
5581   REQ_BGL = False
5582   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583   _FIELDS_STATIC = utils.FieldSet("node")
5584
5585   def CheckArguments(self):
5586     _CheckOutputFields(static=self._FIELDS_STATIC,
5587                        dynamic=self._FIELDS_DYNAMIC,
5588                        selected=self.op.output_fields)
5589
5590   def ExpandNames(self):
5591     self.share_locks = _ShareAll()
5592
5593     if self.op.nodes:
5594       self.needed_locks = {
5595         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5596         }
5597     else:
5598       self.needed_locks = {
5599         locking.LEVEL_NODE: locking.ALL_SET,
5600         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5601         }
5602
5603   def Exec(self, feedback_fn):
5604     """Computes the list of nodes and their attributes.
5605
5606     """
5607     nodenames = self.owned_locks(locking.LEVEL_NODE)
5608     volumes = self.rpc.call_node_volumes(nodenames)
5609
5610     ilist = self.cfg.GetAllInstancesInfo()
5611     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5612
5613     output = []
5614     for node in nodenames:
5615       nresult = volumes[node]
5616       if nresult.offline:
5617         continue
5618       msg = nresult.fail_msg
5619       if msg:
5620         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5621         continue
5622
5623       node_vols = sorted(nresult.payload,
5624                          key=operator.itemgetter("dev"))
5625
5626       for vol in node_vols:
5627         node_output = []
5628         for field in self.op.output_fields:
5629           if field == "node":
5630             val = node
5631           elif field == "phys":
5632             val = vol["dev"]
5633           elif field == "vg":
5634             val = vol["vg"]
5635           elif field == "name":
5636             val = vol["name"]
5637           elif field == "size":
5638             val = int(float(vol["size"]))
5639           elif field == "instance":
5640             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5641           else:
5642             raise errors.ParameterError(field)
5643           node_output.append(str(val))
5644
5645         output.append(node_output)
5646
5647     return output
5648
5649
5650 class LUNodeQueryStorage(NoHooksLU):
5651   """Logical unit for getting information on storage units on node(s).
5652
5653   """
5654   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5655   REQ_BGL = False
5656
5657   def CheckArguments(self):
5658     _CheckOutputFields(static=self._FIELDS_STATIC,
5659                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660                        selected=self.op.output_fields)
5661
5662   def ExpandNames(self):
5663     self.share_locks = _ShareAll()
5664
5665     if self.op.nodes:
5666       self.needed_locks = {
5667         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5668         }
5669     else:
5670       self.needed_locks = {
5671         locking.LEVEL_NODE: locking.ALL_SET,
5672         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5673         }
5674
5675   def Exec(self, feedback_fn):
5676     """Computes the list of nodes and their attributes.
5677
5678     """
5679     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5680
5681     # Always get name to sort by
5682     if constants.SF_NAME in self.op.output_fields:
5683       fields = self.op.output_fields[:]
5684     else:
5685       fields = [constants.SF_NAME] + self.op.output_fields
5686
5687     # Never ask for node or type as it's only known to the LU
5688     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689       while extra in fields:
5690         fields.remove(extra)
5691
5692     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693     name_idx = field_idx[constants.SF_NAME]
5694
5695     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696     data = self.rpc.call_storage_list(self.nodes,
5697                                       self.op.storage_type, st_args,
5698                                       self.op.name, fields)
5699
5700     result = []
5701
5702     for node in utils.NiceSort(self.nodes):
5703       nresult = data[node]
5704       if nresult.offline:
5705         continue
5706
5707       msg = nresult.fail_msg
5708       if msg:
5709         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5710         continue
5711
5712       rows = dict([(row[name_idx], row) for row in nresult.payload])
5713
5714       for name in utils.NiceSort(rows.keys()):
5715         row = rows[name]
5716
5717         out = []
5718
5719         for field in self.op.output_fields:
5720           if field == constants.SF_NODE:
5721             val = node
5722           elif field == constants.SF_TYPE:
5723             val = self.op.storage_type
5724           elif field in field_idx:
5725             val = row[field_idx[field]]
5726           else:
5727             raise errors.ParameterError(field)
5728
5729           out.append(val)
5730
5731         result.append(out)
5732
5733     return result
5734
5735
5736 class _InstanceQuery(_QueryBase):
5737   FIELDS = query.INSTANCE_FIELDS
5738
5739   def ExpandNames(self, lu):
5740     lu.needed_locks = {}
5741     lu.share_locks = _ShareAll()
5742
5743     if self.names:
5744       self.wanted = _GetWantedInstances(lu, self.names)
5745     else:
5746       self.wanted = locking.ALL_SET
5747
5748     self.do_locking = (self.use_locking and
5749                        query.IQ_LIVE in self.requested_data)
5750     if self.do_locking:
5751       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753       lu.needed_locks[locking.LEVEL_NODE] = []
5754       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5755
5756     self.do_grouplocks = (self.do_locking and
5757                           query.IQ_NODES in self.requested_data)
5758
5759   def DeclareLocks(self, lu, level):
5760     if self.do_locking:
5761       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5762         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5763
5764         # Lock all groups used by instances optimistically; this requires going
5765         # via the node before it's locked, requiring verification later on
5766         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5767           set(group_uuid
5768               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5769               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5770       elif level == locking.LEVEL_NODE:
5771         lu._LockInstancesNodes() # pylint: disable=W0212
5772
5773   @staticmethod
5774   def _CheckGroupLocks(lu):
5775     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5776     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5777
5778     # Check if node groups for locked instances are still correct
5779     for instance_name in owned_instances:
5780       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5781
5782   def _GetQueryData(self, lu):
5783     """Computes the list of instances and their attributes.
5784
5785     """
5786     if self.do_grouplocks:
5787       self._CheckGroupLocks(lu)
5788
5789     cluster = lu.cfg.GetClusterInfo()
5790     all_info = lu.cfg.GetAllInstancesInfo()
5791
5792     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5793
5794     instance_list = [all_info[name] for name in instance_names]
5795     nodes = frozenset(itertools.chain(*(inst.all_nodes
5796                                         for inst in instance_list)))
5797     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5798     bad_nodes = []
5799     offline_nodes = []
5800     wrongnode_inst = set()
5801
5802     # Gather data as requested
5803     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5804       live_data = {}
5805       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5806       for name in nodes:
5807         result = node_data[name]
5808         if result.offline:
5809           # offline nodes will be in both lists
5810           assert result.fail_msg
5811           offline_nodes.append(name)
5812         if result.fail_msg:
5813           bad_nodes.append(name)
5814         elif result.payload:
5815           for inst in result.payload:
5816             if inst in all_info:
5817               if all_info[inst].primary_node == name:
5818                 live_data.update(result.payload)
5819               else:
5820                 wrongnode_inst.add(inst)
5821             else:
5822               # orphan instance; we don't list it here as we don't
5823               # handle this case yet in the output of instance listing
5824               logging.warning("Orphan instance '%s' found on node %s",
5825                               inst, name)
5826         # else no instance is alive
5827     else:
5828       live_data = {}
5829
5830     if query.IQ_DISKUSAGE in self.requested_data:
5831       gmi = ganeti.masterd.instance
5832       disk_usage = dict((inst.name,
5833                          gmi.ComputeDiskSize(inst.disk_template,
5834                                              [{constants.IDISK_SIZE: disk.size}
5835                                               for disk in inst.disks]))
5836                         for inst in instance_list)
5837     else:
5838       disk_usage = None
5839
5840     if query.IQ_CONSOLE in self.requested_data:
5841       consinfo = {}
5842       for inst in instance_list:
5843         if inst.name in live_data:
5844           # Instance is running
5845           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5846         else:
5847           consinfo[inst.name] = None
5848       assert set(consinfo.keys()) == set(instance_names)
5849     else:
5850       consinfo = None
5851
5852     if query.IQ_NODES in self.requested_data:
5853       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5854                                             instance_list)))
5855       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5856       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5857                     for uuid in set(map(operator.attrgetter("group"),
5858                                         nodes.values())))
5859     else:
5860       nodes = None
5861       groups = None
5862
5863     if query.IQ_NETWORKS in self.requested_data:
5864       net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5865                                     for i in instance_list))
5866       networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5867     else:
5868       networks = None
5869
5870     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5871                                    disk_usage, offline_nodes, bad_nodes,
5872                                    live_data, wrongnode_inst, consinfo,
5873                                    nodes, groups, networks)
5874
5875
5876 class LUQuery(NoHooksLU):
5877   """Query for resources/items of a certain kind.
5878
5879   """
5880   # pylint: disable=W0142
5881   REQ_BGL = False
5882
5883   def CheckArguments(self):
5884     qcls = _GetQueryImplementation(self.op.what)
5885
5886     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5887
5888   def ExpandNames(self):
5889     self.impl.ExpandNames(self)
5890
5891   def DeclareLocks(self, level):
5892     self.impl.DeclareLocks(self, level)
5893
5894   def Exec(self, feedback_fn):
5895     return self.impl.NewStyleQuery(self)
5896
5897
5898 class LUQueryFields(NoHooksLU):
5899   """Query for resources/items of a certain kind.
5900
5901   """
5902   # pylint: disable=W0142
5903   REQ_BGL = False
5904
5905   def CheckArguments(self):
5906     self.qcls = _GetQueryImplementation(self.op.what)
5907
5908   def ExpandNames(self):
5909     self.needed_locks = {}
5910
5911   def Exec(self, feedback_fn):
5912     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5913
5914
5915 class LUNodeModifyStorage(NoHooksLU):
5916   """Logical unit for modifying a storage volume on a node.
5917
5918   """
5919   REQ_BGL = False
5920
5921   def CheckArguments(self):
5922     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5923
5924     storage_type = self.op.storage_type
5925
5926     try:
5927       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5928     except KeyError:
5929       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5930                                  " modified" % storage_type,
5931                                  errors.ECODE_INVAL)
5932
5933     diff = set(self.op.changes.keys()) - modifiable
5934     if diff:
5935       raise errors.OpPrereqError("The following fields can not be modified for"
5936                                  " storage units of type '%s': %r" %
5937                                  (storage_type, list(diff)),
5938                                  errors.ECODE_INVAL)
5939
5940   def ExpandNames(self):
5941     self.needed_locks = {
5942       locking.LEVEL_NODE: self.op.node_name,
5943       }
5944
5945   def Exec(self, feedback_fn):
5946     """Computes the list of nodes and their attributes.
5947
5948     """
5949     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5950     result = self.rpc.call_storage_modify(self.op.node_name,
5951                                           self.op.storage_type, st_args,
5952                                           self.op.name, self.op.changes)
5953     result.Raise("Failed to modify storage unit '%s' on %s" %
5954                  (self.op.name, self.op.node_name))
5955
5956
5957 class LUNodeAdd(LogicalUnit):
5958   """Logical unit for adding node to the cluster.
5959
5960   """
5961   HPATH = "node-add"
5962   HTYPE = constants.HTYPE_NODE
5963   _NFLAGS = ["master_capable", "vm_capable"]
5964
5965   def CheckArguments(self):
5966     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5967     # validate/normalize the node name
5968     self.hostname = netutils.GetHostname(name=self.op.node_name,
5969                                          family=self.primary_ip_family)
5970     self.op.node_name = self.hostname.name
5971
5972     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5973       raise errors.OpPrereqError("Cannot readd the master node",
5974                                  errors.ECODE_STATE)
5975
5976     if self.op.readd and self.op.group:
5977       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5978                                  " being readded", errors.ECODE_INVAL)
5979
5980   def BuildHooksEnv(self):
5981     """Build hooks env.
5982
5983     This will run on all nodes before, and on all nodes + the new node after.
5984
5985     """
5986     return {
5987       "OP_TARGET": self.op.node_name,
5988       "NODE_NAME": self.op.node_name,
5989       "NODE_PIP": self.op.primary_ip,
5990       "NODE_SIP": self.op.secondary_ip,
5991       "MASTER_CAPABLE": str(self.op.master_capable),
5992       "VM_CAPABLE": str(self.op.vm_capable),
5993       }
5994
5995   def BuildHooksNodes(self):
5996     """Build hooks nodes.
5997
5998     """
5999     # Exclude added node
6000     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6001     post_nodes = pre_nodes + [self.op.node_name, ]
6002
6003     return (pre_nodes, post_nodes)
6004
6005   def CheckPrereq(self):
6006     """Check prerequisites.
6007
6008     This checks:
6009      - the new node is not already in the config
6010      - it is resolvable
6011      - its parameters (single/dual homed) matches the cluster
6012
6013     Any errors are signaled by raising errors.OpPrereqError.
6014
6015     """
6016     cfg = self.cfg
6017     hostname = self.hostname
6018     node = hostname.name
6019     primary_ip = self.op.primary_ip = hostname.ip
6020     if self.op.secondary_ip is None:
6021       if self.primary_ip_family == netutils.IP6Address.family:
6022         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6023                                    " IPv4 address must be given as secondary",
6024                                    errors.ECODE_INVAL)
6025       self.op.secondary_ip = primary_ip
6026
6027     secondary_ip = self.op.secondary_ip
6028     if not netutils.IP4Address.IsValid(secondary_ip):
6029       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6030                                  " address" % secondary_ip, errors.ECODE_INVAL)
6031
6032     node_list = cfg.GetNodeList()
6033     if not self.op.readd and node in node_list:
6034       raise errors.OpPrereqError("Node %s is already in the configuration" %
6035                                  node, errors.ECODE_EXISTS)
6036     elif self.op.readd and node not in node_list:
6037       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6038                                  errors.ECODE_NOENT)
6039
6040     self.changed_primary_ip = False
6041
6042     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6043       if self.op.readd and node == existing_node_name:
6044         if existing_node.secondary_ip != secondary_ip:
6045           raise errors.OpPrereqError("Readded node doesn't have the same IP"
6046                                      " address configuration as before",
6047                                      errors.ECODE_INVAL)
6048         if existing_node.primary_ip != primary_ip:
6049           self.changed_primary_ip = True
6050
6051         continue
6052
6053       if (existing_node.primary_ip == primary_ip or
6054           existing_node.secondary_ip == primary_ip or
6055           existing_node.primary_ip == secondary_ip or
6056           existing_node.secondary_ip == secondary_ip):
6057         raise errors.OpPrereqError("New node ip address(es) conflict with"
6058                                    " existing node %s" % existing_node.name,
6059                                    errors.ECODE_NOTUNIQUE)
6060
6061     # After this 'if' block, None is no longer a valid value for the
6062     # _capable op attributes
6063     if self.op.readd:
6064       old_node = self.cfg.GetNodeInfo(node)
6065       assert old_node is not None, "Can't retrieve locked node %s" % node
6066       for attr in self._NFLAGS:
6067         if getattr(self.op, attr) is None:
6068           setattr(self.op, attr, getattr(old_node, attr))
6069     else:
6070       for attr in self._NFLAGS:
6071         if getattr(self.op, attr) is None:
6072           setattr(self.op, attr, True)
6073
6074     if self.op.readd and not self.op.vm_capable:
6075       pri, sec = cfg.GetNodeInstances(node)
6076       if pri or sec:
6077         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6078                                    " flag set to false, but it already holds"
6079                                    " instances" % node,
6080                                    errors.ECODE_STATE)
6081
6082     # check that the type of the node (single versus dual homed) is the
6083     # same as for the master
6084     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6085     master_singlehomed = myself.secondary_ip == myself.primary_ip
6086     newbie_singlehomed = secondary_ip == primary_ip
6087     if master_singlehomed != newbie_singlehomed:
6088       if master_singlehomed:
6089         raise errors.OpPrereqError("The master has no secondary ip but the"
6090                                    " new node has one",
6091                                    errors.ECODE_INVAL)
6092       else:
6093         raise errors.OpPrereqError("The master has a secondary ip but the"
6094                                    " new node doesn't have one",
6095                                    errors.ECODE_INVAL)
6096
6097     # checks reachability
6098     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6099       raise errors.OpPrereqError("Node not reachable by ping",
6100                                  errors.ECODE_ENVIRON)
6101
6102     if not newbie_singlehomed:
6103       # check reachability from my secondary ip to newbie's secondary ip
6104       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6105                               source=myself.secondary_ip):
6106         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6107                                    " based ping to node daemon port",
6108                                    errors.ECODE_ENVIRON)
6109
6110     if self.op.readd:
6111       exceptions = [node]
6112     else:
6113       exceptions = []
6114
6115     if self.op.master_capable:
6116       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6117     else:
6118       self.master_candidate = False
6119
6120     if self.op.readd:
6121       self.new_node = old_node
6122     else:
6123       node_group = cfg.LookupNodeGroup(self.op.group)
6124       self.new_node = objects.Node(name=node,
6125                                    primary_ip=primary_ip,
6126                                    secondary_ip=secondary_ip,
6127                                    master_candidate=self.master_candidate,
6128                                    offline=False, drained=False,
6129                                    group=node_group, ndparams={})
6130
6131     if self.op.ndparams:
6132       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6133       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6134                             "node", "cluster or group")
6135
6136     if self.op.hv_state:
6137       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6138
6139     if self.op.disk_state:
6140       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6141
6142     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6143     #       it a property on the base class.
6144     rpcrunner = rpc.DnsOnlyRunner()
6145     result = rpcrunner.call_version([node])[node]
6146     result.Raise("Can't get version information from node %s" % node)
6147     if constants.PROTOCOL_VERSION == result.payload:
6148       logging.info("Communication to node %s fine, sw version %s match",
6149                    node, result.payload)
6150     else:
6151       raise errors.OpPrereqError("Version mismatch master version %s,"
6152                                  " node version %s" %
6153                                  (constants.PROTOCOL_VERSION, result.payload),
6154                                  errors.ECODE_ENVIRON)
6155
6156     vg_name = cfg.GetVGName()
6157     if vg_name is not None:
6158       vparams = {constants.NV_PVLIST: [vg_name]}
6159       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6160       cname = self.cfg.GetClusterName()
6161       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6162       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6163       if errmsgs:
6164         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6165                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
6166
6167   def Exec(self, feedback_fn):
6168     """Adds the new node to the cluster.
6169
6170     """
6171     new_node = self.new_node
6172     node = new_node.name
6173
6174     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6175       "Not owning BGL"
6176
6177     # We adding a new node so we assume it's powered
6178     new_node.powered = True
6179
6180     # for re-adds, reset the offline/drained/master-candidate flags;
6181     # we need to reset here, otherwise offline would prevent RPC calls
6182     # later in the procedure; this also means that if the re-add
6183     # fails, we are left with a non-offlined, broken node
6184     if self.op.readd:
6185       new_node.drained = new_node.offline = False # pylint: disable=W0201
6186       self.LogInfo("Readding a node, the offline/drained flags were reset")
6187       # if we demote the node, we do cleanup later in the procedure
6188       new_node.master_candidate = self.master_candidate
6189       if self.changed_primary_ip:
6190         new_node.primary_ip = self.op.primary_ip
6191
6192     # copy the master/vm_capable flags
6193     for attr in self._NFLAGS:
6194       setattr(new_node, attr, getattr(self.op, attr))
6195
6196     # notify the user about any possible mc promotion
6197     if new_node.master_candidate:
6198       self.LogInfo("Node will be a master candidate")
6199
6200     if self.op.ndparams:
6201       new_node.ndparams = self.op.ndparams
6202     else:
6203       new_node.ndparams = {}
6204
6205     if self.op.hv_state:
6206       new_node.hv_state_static = self.new_hv_state
6207
6208     if self.op.disk_state:
6209       new_node.disk_state_static = self.new_disk_state
6210
6211     # Add node to our /etc/hosts, and add key to known_hosts
6212     if self.cfg.GetClusterInfo().modify_etc_hosts:
6213       master_node = self.cfg.GetMasterNode()
6214       result = self.rpc.call_etc_hosts_modify(master_node,
6215                                               constants.ETC_HOSTS_ADD,
6216                                               self.hostname.name,
6217                                               self.hostname.ip)
6218       result.Raise("Can't update hosts file with new host data")
6219
6220     if new_node.secondary_ip != new_node.primary_ip:
6221       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6222                                False)
6223
6224     node_verify_list = [self.cfg.GetMasterNode()]
6225     node_verify_param = {
6226       constants.NV_NODELIST: ([node], {}),
6227       # TODO: do a node-net-test as well?
6228     }
6229
6230     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6231                                        self.cfg.GetClusterName())
6232     for verifier in node_verify_list:
6233       result[verifier].Raise("Cannot communicate with node %s" % verifier)
6234       nl_payload = result[verifier].payload[constants.NV_NODELIST]
6235       if nl_payload:
6236         for failed in nl_payload:
6237           feedback_fn("ssh/hostname verification failed"
6238                       " (checking from %s): %s" %
6239                       (verifier, nl_payload[failed]))
6240         raise errors.OpExecError("ssh/hostname verification failed")
6241
6242     if self.op.readd:
6243       _RedistributeAncillaryFiles(self)
6244       self.context.ReaddNode(new_node)
6245       # make sure we redistribute the config
6246       self.cfg.Update(new_node, feedback_fn)
6247       # and make sure the new node will not have old files around
6248       if not new_node.master_candidate:
6249         result = self.rpc.call_node_demote_from_mc(new_node.name)
6250         msg = result.fail_msg
6251         if msg:
6252           self.LogWarning("Node failed to demote itself from master"
6253                           " candidate status: %s" % msg)
6254     else:
6255       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6256                                   additional_vm=self.op.vm_capable)
6257       self.context.AddNode(new_node, self.proc.GetECId())
6258
6259
6260 class LUNodeSetParams(LogicalUnit):
6261   """Modifies the parameters of a node.
6262
6263   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6264       to the node role (as _ROLE_*)
6265   @cvar _R2F: a dictionary from node role to tuples of flags
6266   @cvar _FLAGS: a list of attribute names corresponding to the flags
6267
6268   """
6269   HPATH = "node-modify"
6270   HTYPE = constants.HTYPE_NODE
6271   REQ_BGL = False
6272   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6273   _F2R = {
6274     (True, False, False): _ROLE_CANDIDATE,
6275     (False, True, False): _ROLE_DRAINED,
6276     (False, False, True): _ROLE_OFFLINE,
6277     (False, False, False): _ROLE_REGULAR,
6278     }
6279   _R2F = dict((v, k) for k, v in _F2R.items())
6280   _FLAGS = ["master_candidate", "drained", "offline"]
6281
6282   def CheckArguments(self):
6283     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6284     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6285                 self.op.master_capable, self.op.vm_capable,
6286                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6287                 self.op.disk_state]
6288     if all_mods.count(None) == len(all_mods):
6289       raise errors.OpPrereqError("Please pass at least one modification",
6290                                  errors.ECODE_INVAL)
6291     if all_mods.count(True) > 1:
6292       raise errors.OpPrereqError("Can't set the node into more than one"
6293                                  " state at the same time",
6294                                  errors.ECODE_INVAL)
6295
6296     # Boolean value that tells us whether we might be demoting from MC
6297     self.might_demote = (self.op.master_candidate is False or
6298                          self.op.offline is True or
6299                          self.op.drained is True or
6300                          self.op.master_capable is False)
6301
6302     if self.op.secondary_ip:
6303       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6304         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6305                                    " address" % self.op.secondary_ip,
6306                                    errors.ECODE_INVAL)
6307
6308     self.lock_all = self.op.auto_promote and self.might_demote
6309     self.lock_instances = self.op.secondary_ip is not None
6310
6311   def _InstanceFilter(self, instance):
6312     """Filter for getting affected instances.
6313
6314     """
6315     return (instance.disk_template in constants.DTS_INT_MIRROR and
6316             self.op.node_name in instance.all_nodes)
6317
6318   def ExpandNames(self):
6319     if self.lock_all:
6320       self.needed_locks = {
6321         locking.LEVEL_NODE: locking.ALL_SET,
6322
6323         # Block allocations when all nodes are locked
6324         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6325         }
6326     else:
6327       self.needed_locks = {
6328         locking.LEVEL_NODE: self.op.node_name,
6329         }
6330
6331     # Since modifying a node can have severe effects on currently running
6332     # operations the resource lock is at least acquired in shared mode
6333     self.needed_locks[locking.LEVEL_NODE_RES] = \
6334       self.needed_locks[locking.LEVEL_NODE]
6335
6336     # Get all locks except nodes in shared mode; they are not used for anything
6337     # but read-only access
6338     self.share_locks = _ShareAll()
6339     self.share_locks[locking.LEVEL_NODE] = 0
6340     self.share_locks[locking.LEVEL_NODE_RES] = 0
6341     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6342
6343     if self.lock_instances:
6344       self.needed_locks[locking.LEVEL_INSTANCE] = \
6345         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6346
6347   def BuildHooksEnv(self):
6348     """Build hooks env.
6349
6350     This runs on the master node.
6351
6352     """
6353     return {
6354       "OP_TARGET": self.op.node_name,
6355       "MASTER_CANDIDATE": str(self.op.master_candidate),
6356       "OFFLINE": str(self.op.offline),
6357       "DRAINED": str(self.op.drained),
6358       "MASTER_CAPABLE": str(self.op.master_capable),
6359       "VM_CAPABLE": str(self.op.vm_capable),
6360       }
6361
6362   def BuildHooksNodes(self):
6363     """Build hooks nodes.
6364
6365     """
6366     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6367     return (nl, nl)
6368
6369   def CheckPrereq(self):
6370     """Check prerequisites.
6371
6372     This only checks the instance list against the existing names.
6373
6374     """
6375     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6376
6377     if self.lock_instances:
6378       affected_instances = \
6379         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6380
6381       # Verify instance locks
6382       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6383       wanted_instances = frozenset(affected_instances.keys())
6384       if wanted_instances - owned_instances:
6385         raise errors.OpPrereqError("Instances affected by changing node %s's"
6386                                    " secondary IP address have changed since"
6387                                    " locks were acquired, wanted '%s', have"
6388                                    " '%s'; retry the operation" %
6389                                    (self.op.node_name,
6390                                     utils.CommaJoin(wanted_instances),
6391                                     utils.CommaJoin(owned_instances)),
6392                                    errors.ECODE_STATE)
6393     else:
6394       affected_instances = None
6395
6396     if (self.op.master_candidate is not None or
6397         self.op.drained is not None or
6398         self.op.offline is not None):
6399       # we can't change the master's node flags
6400       if self.op.node_name == self.cfg.GetMasterNode():
6401         raise errors.OpPrereqError("The master role can be changed"
6402                                    " only via master-failover",
6403                                    errors.ECODE_INVAL)
6404
6405     if self.op.master_candidate and not node.master_capable:
6406       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6407                                  " it a master candidate" % node.name,
6408                                  errors.ECODE_STATE)
6409
6410     if self.op.vm_capable is False:
6411       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6412       if ipri or isec:
6413         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6414                                    " the vm_capable flag" % node.name,
6415                                    errors.ECODE_STATE)
6416
6417     if node.master_candidate and self.might_demote and not self.lock_all:
6418       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6419       # check if after removing the current node, we're missing master
6420       # candidates
6421       (mc_remaining, mc_should, _) = \
6422           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6423       if mc_remaining < mc_should:
6424         raise errors.OpPrereqError("Not enough master candidates, please"
6425                                    " pass auto promote option to allow"
6426                                    " promotion (--auto-promote or RAPI"
6427                                    " auto_promote=True)", errors.ECODE_STATE)
6428
6429     self.old_flags = old_flags = (node.master_candidate,
6430                                   node.drained, node.offline)
6431     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6432     self.old_role = old_role = self._F2R[old_flags]
6433
6434     # Check for ineffective changes
6435     for attr in self._FLAGS:
6436       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6437         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6438         setattr(self.op, attr, None)
6439
6440     # Past this point, any flag change to False means a transition
6441     # away from the respective state, as only real changes are kept
6442
6443     # TODO: We might query the real power state if it supports OOB
6444     if _SupportsOob(self.cfg, node):
6445       if self.op.offline is False and not (node.powered or
6446                                            self.op.powered is True):
6447         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6448                                     " offline status can be reset") %
6449                                    self.op.node_name, errors.ECODE_STATE)
6450     elif self.op.powered is not None:
6451       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6452                                   " as it does not support out-of-band"
6453                                   " handling") % self.op.node_name,
6454                                  errors.ECODE_STATE)
6455
6456     # If we're being deofflined/drained, we'll MC ourself if needed
6457     if (self.op.drained is False or self.op.offline is False or
6458         (self.op.master_capable and not node.master_capable)):
6459       if _DecideSelfPromotion(self):
6460         self.op.master_candidate = True
6461         self.LogInfo("Auto-promoting node to master candidate")
6462
6463     # If we're no longer master capable, we'll demote ourselves from MC
6464     if self.op.master_capable is False and node.master_candidate:
6465       self.LogInfo("Demoting from master candidate")
6466       self.op.master_candidate = False
6467
6468     # Compute new role
6469     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6470     if self.op.master_candidate:
6471       new_role = self._ROLE_CANDIDATE
6472     elif self.op.drained:
6473       new_role = self._ROLE_DRAINED
6474     elif self.op.offline:
6475       new_role = self._ROLE_OFFLINE
6476     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6477       # False is still in new flags, which means we're un-setting (the
6478       # only) True flag
6479       new_role = self._ROLE_REGULAR
6480     else: # no new flags, nothing, keep old role
6481       new_role = old_role
6482
6483     self.new_role = new_role
6484
6485     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6486       # Trying to transition out of offline status
6487       result = self.rpc.call_version([node.name])[node.name]
6488       if result.fail_msg:
6489         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6490                                    " to report its version: %s" %
6491                                    (node.name, result.fail_msg),
6492                                    errors.ECODE_STATE)
6493       else:
6494         self.LogWarning("Transitioning node from offline to online state"
6495                         " without using re-add. Please make sure the node"
6496                         " is healthy!")
6497
6498     # When changing the secondary ip, verify if this is a single-homed to
6499     # multi-homed transition or vice versa, and apply the relevant
6500     # restrictions.
6501     if self.op.secondary_ip:
6502       # Ok even without locking, because this can't be changed by any LU
6503       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6504       master_singlehomed = master.secondary_ip == master.primary_ip
6505       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6506         if self.op.force and node.name == master.name:
6507           self.LogWarning("Transitioning from single-homed to multi-homed"
6508                           " cluster; all nodes will require a secondary IP"
6509                           " address")
6510         else:
6511           raise errors.OpPrereqError("Changing the secondary ip on a"
6512                                      " single-homed cluster requires the"
6513                                      " --force option to be passed, and the"
6514                                      " target node to be the master",
6515                                      errors.ECODE_INVAL)
6516       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6517         if self.op.force and node.name == master.name:
6518           self.LogWarning("Transitioning from multi-homed to single-homed"
6519                           " cluster; secondary IP addresses will have to be"
6520                           " removed")
6521         else:
6522           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6523                                      " same as the primary IP on a multi-homed"
6524                                      " cluster, unless the --force option is"
6525                                      " passed, and the target node is the"
6526                                      " master", errors.ECODE_INVAL)
6527
6528       assert not (frozenset(affected_instances) -
6529                   self.owned_locks(locking.LEVEL_INSTANCE))
6530
6531       if node.offline:
6532         if affected_instances:
6533           msg = ("Cannot change secondary IP address: offline node has"
6534                  " instances (%s) configured to use it" %
6535                  utils.CommaJoin(affected_instances.keys()))
6536           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6537       else:
6538         # On online nodes, check that no instances are running, and that
6539         # the node has the new ip and we can reach it.
6540         for instance in affected_instances.values():
6541           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6542                               msg="cannot change secondary ip")
6543
6544         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6545         if master.name != node.name:
6546           # check reachability from master secondary ip to new secondary ip
6547           if not netutils.TcpPing(self.op.secondary_ip,
6548                                   constants.DEFAULT_NODED_PORT,
6549                                   source=master.secondary_ip):
6550             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6551                                        " based ping to node daemon port",
6552                                        errors.ECODE_ENVIRON)
6553
6554     if self.op.ndparams:
6555       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6556       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6557       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6558                             "node", "cluster or group")
6559       self.new_ndparams = new_ndparams
6560
6561     if self.op.hv_state:
6562       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6563                                                  self.node.hv_state_static)
6564
6565     if self.op.disk_state:
6566       self.new_disk_state = \
6567         _MergeAndVerifyDiskState(self.op.disk_state,
6568                                  self.node.disk_state_static)
6569
6570   def Exec(self, feedback_fn):
6571     """Modifies a node.
6572
6573     """
6574     node = self.node
6575     old_role = self.old_role
6576     new_role = self.new_role
6577
6578     result = []
6579
6580     if self.op.ndparams:
6581       node.ndparams = self.new_ndparams
6582
6583     if self.op.powered is not None:
6584       node.powered = self.op.powered
6585
6586     if self.op.hv_state:
6587       node.hv_state_static = self.new_hv_state
6588
6589     if self.op.disk_state:
6590       node.disk_state_static = self.new_disk_state
6591
6592     for attr in ["master_capable", "vm_capable"]:
6593       val = getattr(self.op, attr)
6594       if val is not None:
6595         setattr(node, attr, val)
6596         result.append((attr, str(val)))
6597
6598     if new_role != old_role:
6599       # Tell the node to demote itself, if no longer MC and not offline
6600       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6601         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6602         if msg:
6603           self.LogWarning("Node failed to demote itself: %s", msg)
6604
6605       new_flags = self._R2F[new_role]
6606       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6607         if of != nf:
6608           result.append((desc, str(nf)))
6609       (node.master_candidate, node.drained, node.offline) = new_flags
6610
6611       # we locked all nodes, we adjust the CP before updating this node
6612       if self.lock_all:
6613         _AdjustCandidatePool(self, [node.name])
6614
6615     if self.op.secondary_ip:
6616       node.secondary_ip = self.op.secondary_ip
6617       result.append(("secondary_ip", self.op.secondary_ip))
6618
6619     # this will trigger configuration file update, if needed
6620     self.cfg.Update(node, feedback_fn)
6621
6622     # this will trigger job queue propagation or cleanup if the mc
6623     # flag changed
6624     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6625       self.context.ReaddNode(node)
6626
6627     return result
6628
6629
6630 class LUNodePowercycle(NoHooksLU):
6631   """Powercycles a node.
6632
6633   """
6634   REQ_BGL = False
6635
6636   def CheckArguments(self):
6637     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6638     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6639       raise errors.OpPrereqError("The node is the master and the force"
6640                                  " parameter was not set",
6641                                  errors.ECODE_INVAL)
6642
6643   def ExpandNames(self):
6644     """Locking for PowercycleNode.
6645
6646     This is a last-resort option and shouldn't block on other
6647     jobs. Therefore, we grab no locks.
6648
6649     """
6650     self.needed_locks = {}
6651
6652   def Exec(self, feedback_fn):
6653     """Reboots a node.
6654
6655     """
6656     result = self.rpc.call_node_powercycle(self.op.node_name,
6657                                            self.cfg.GetHypervisorType())
6658     result.Raise("Failed to schedule the reboot")
6659     return result.payload
6660
6661
6662 class LUClusterQuery(NoHooksLU):
6663   """Query cluster configuration.
6664
6665   """
6666   REQ_BGL = False
6667
6668   def ExpandNames(self):
6669     self.needed_locks = {}
6670
6671   def Exec(self, feedback_fn):
6672     """Return cluster config.
6673
6674     """
6675     cluster = self.cfg.GetClusterInfo()
6676     os_hvp = {}
6677
6678     # Filter just for enabled hypervisors
6679     for os_name, hv_dict in cluster.os_hvp.items():
6680       os_hvp[os_name] = {}
6681       for hv_name, hv_params in hv_dict.items():
6682         if hv_name in cluster.enabled_hypervisors:
6683           os_hvp[os_name][hv_name] = hv_params
6684
6685     # Convert ip_family to ip_version
6686     primary_ip_version = constants.IP4_VERSION
6687     if cluster.primary_ip_family == netutils.IP6Address.family:
6688       primary_ip_version = constants.IP6_VERSION
6689
6690     result = {
6691       "software_version": constants.RELEASE_VERSION,
6692       "protocol_version": constants.PROTOCOL_VERSION,
6693       "config_version": constants.CONFIG_VERSION,
6694       "os_api_version": max(constants.OS_API_VERSIONS),
6695       "export_version": constants.EXPORT_VERSION,
6696       "architecture": runtime.GetArchInfo(),
6697       "name": cluster.cluster_name,
6698       "master": cluster.master_node,
6699       "default_hypervisor": cluster.primary_hypervisor,
6700       "enabled_hypervisors": cluster.enabled_hypervisors,
6701       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6702                         for hypervisor_name in cluster.enabled_hypervisors]),
6703       "os_hvp": os_hvp,
6704       "beparams": cluster.beparams,
6705       "osparams": cluster.osparams,
6706       "ipolicy": cluster.ipolicy,
6707       "nicparams": cluster.nicparams,
6708       "ndparams": cluster.ndparams,
6709       "diskparams": cluster.diskparams,
6710       "candidate_pool_size": cluster.candidate_pool_size,
6711       "master_netdev": cluster.master_netdev,
6712       "master_netmask": cluster.master_netmask,
6713       "use_external_mip_script": cluster.use_external_mip_script,
6714       "volume_group_name": cluster.volume_group_name,
6715       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6716       "file_storage_dir": cluster.file_storage_dir,
6717       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6718       "maintain_node_health": cluster.maintain_node_health,
6719       "ctime": cluster.ctime,
6720       "mtime": cluster.mtime,
6721       "uuid": cluster.uuid,
6722       "tags": list(cluster.GetTags()),
6723       "uid_pool": cluster.uid_pool,
6724       "default_iallocator": cluster.default_iallocator,
6725       "reserved_lvs": cluster.reserved_lvs,
6726       "primary_ip_version": primary_ip_version,
6727       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6728       "hidden_os": cluster.hidden_os,
6729       "blacklisted_os": cluster.blacklisted_os,
6730       }
6731
6732     return result
6733
6734
6735 class LUClusterConfigQuery(NoHooksLU):
6736   """Return configuration values.
6737
6738   """
6739   REQ_BGL = False
6740
6741   def CheckArguments(self):
6742     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6743
6744   def ExpandNames(self):
6745     self.cq.ExpandNames(self)
6746
6747   def DeclareLocks(self, level):
6748     self.cq.DeclareLocks(self, level)
6749
6750   def Exec(self, feedback_fn):
6751     result = self.cq.OldStyleQuery(self)
6752
6753     assert len(result) == 1
6754
6755     return result[0]
6756
6757
6758 class _ClusterQuery(_QueryBase):
6759   FIELDS = query.CLUSTER_FIELDS
6760
6761   #: Do not sort (there is only one item)
6762   SORT_FIELD = None
6763
6764   def ExpandNames(self, lu):
6765     lu.needed_locks = {}
6766
6767     # The following variables interact with _QueryBase._GetNames
6768     self.wanted = locking.ALL_SET
6769     self.do_locking = self.use_locking
6770
6771     if self.do_locking:
6772       raise errors.OpPrereqError("Can not use locking for cluster queries",
6773                                  errors.ECODE_INVAL)
6774
6775   def DeclareLocks(self, lu, level):
6776     pass
6777
6778   def _GetQueryData(self, lu):
6779     """Computes the list of nodes and their attributes.
6780
6781     """
6782     # Locking is not used
6783     assert not (compat.any(lu.glm.is_owned(level)
6784                            for level in locking.LEVELS
6785                            if level != locking.LEVEL_CLUSTER) or
6786                 self.do_locking or self.use_locking)
6787
6788     if query.CQ_CONFIG in self.requested_data:
6789       cluster = lu.cfg.GetClusterInfo()
6790     else:
6791       cluster = NotImplemented
6792
6793     if query.CQ_QUEUE_DRAINED in self.requested_data:
6794       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6795     else:
6796       drain_flag = NotImplemented
6797
6798     if query.CQ_WATCHER_PAUSE in self.requested_data:
6799       master_name = lu.cfg.GetMasterNode()
6800
6801       result = lu.rpc.call_get_watcher_pause(master_name)
6802       result.Raise("Can't retrieve watcher pause from master node '%s'" %
6803                    master_name)
6804
6805       watcher_pause = result.payload
6806     else:
6807       watcher_pause = NotImplemented
6808
6809     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6810
6811
6812 class LUInstanceActivateDisks(NoHooksLU):
6813   """Bring up an instance's disks.
6814
6815   """
6816   REQ_BGL = False
6817
6818   def ExpandNames(self):
6819     self._ExpandAndLockInstance()
6820     self.needed_locks[locking.LEVEL_NODE] = []
6821     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6822
6823   def DeclareLocks(self, level):
6824     if level == locking.LEVEL_NODE:
6825       self._LockInstancesNodes()
6826
6827   def CheckPrereq(self):
6828     """Check prerequisites.
6829
6830     This checks that the instance is in the cluster.
6831
6832     """
6833     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6834     assert self.instance is not None, \
6835       "Cannot retrieve locked instance %s" % self.op.instance_name
6836     _CheckNodeOnline(self, self.instance.primary_node)
6837
6838   def Exec(self, feedback_fn):
6839     """Activate the disks.
6840
6841     """
6842     disks_ok, disks_info = \
6843               _AssembleInstanceDisks(self, self.instance,
6844                                      ignore_size=self.op.ignore_size)
6845     if not disks_ok:
6846       raise errors.OpExecError("Cannot activate block devices")
6847
6848     if self.op.wait_for_sync:
6849       if not _WaitForSync(self, self.instance):
6850         raise errors.OpExecError("Some disks of the instance are degraded!")
6851
6852     return disks_info
6853
6854
6855 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6856                            ignore_size=False):
6857   """Prepare the block devices for an instance.
6858
6859   This sets up the block devices on all nodes.
6860
6861   @type lu: L{LogicalUnit}
6862   @param lu: the logical unit on whose behalf we execute
6863   @type instance: L{objects.Instance}
6864   @param instance: the instance for whose disks we assemble
6865   @type disks: list of L{objects.Disk} or None
6866   @param disks: which disks to assemble (or all, if None)
6867   @type ignore_secondaries: boolean
6868   @param ignore_secondaries: if true, errors on secondary nodes
6869       won't result in an error return from the function
6870   @type ignore_size: boolean
6871   @param ignore_size: if true, the current known size of the disk
6872       will not be used during the disk activation, useful for cases
6873       when the size is wrong
6874   @return: False if the operation failed, otherwise a list of
6875       (host, instance_visible_name, node_visible_name)
6876       with the mapping from node devices to instance devices
6877
6878   """
6879   device_info = []
6880   disks_ok = True
6881   iname = instance.name
6882   disks = _ExpandCheckDisks(instance, disks)
6883
6884   # With the two passes mechanism we try to reduce the window of
6885   # opportunity for the race condition of switching DRBD to primary
6886   # before handshaking occured, but we do not eliminate it
6887
6888   # The proper fix would be to wait (with some limits) until the
6889   # connection has been made and drbd transitions from WFConnection
6890   # into any other network-connected state (Connected, SyncTarget,
6891   # SyncSource, etc.)
6892
6893   # 1st pass, assemble on all nodes in secondary mode
6894   for idx, inst_disk in enumerate(disks):
6895     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6896       if ignore_size:
6897         node_disk = node_disk.Copy()
6898         node_disk.UnsetSize()
6899       lu.cfg.SetDiskID(node_disk, node)
6900       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6901                                              False, idx)
6902       msg = result.fail_msg
6903       if msg:
6904         is_offline_secondary = (node in instance.secondary_nodes and
6905                                 result.offline)
6906         lu.LogWarning("Could not prepare block device %s on node %s"
6907                       " (is_primary=False, pass=1): %s",
6908                       inst_disk.iv_name, node, msg)
6909         if not (ignore_secondaries or is_offline_secondary):
6910           disks_ok = False
6911
6912   # FIXME: race condition on drbd migration to primary
6913
6914   # 2nd pass, do only the primary node
6915   for idx, inst_disk in enumerate(disks):
6916     dev_path = None
6917
6918     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6919       if node != instance.primary_node:
6920         continue
6921       if ignore_size:
6922         node_disk = node_disk.Copy()
6923         node_disk.UnsetSize()
6924       lu.cfg.SetDiskID(node_disk, node)
6925       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6926                                              True, idx)
6927       msg = result.fail_msg
6928       if msg:
6929         lu.LogWarning("Could not prepare block device %s on node %s"
6930                       " (is_primary=True, pass=2): %s",
6931                       inst_disk.iv_name, node, msg)
6932         disks_ok = False
6933       else:
6934         dev_path = result.payload
6935
6936     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6937
6938   # leave the disks configured for the primary node
6939   # this is a workaround that would be fixed better by
6940   # improving the logical/physical id handling
6941   for disk in disks:
6942     lu.cfg.SetDiskID(disk, instance.primary_node)
6943
6944   return disks_ok, device_info
6945
6946
6947 def _StartInstanceDisks(lu, instance, force):
6948   """Start the disks of an instance.
6949
6950   """
6951   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6952                                            ignore_secondaries=force)
6953   if not disks_ok:
6954     _ShutdownInstanceDisks(lu, instance)
6955     if force is not None and not force:
6956       lu.LogWarning("",
6957                     hint=("If the message above refers to a secondary node,"
6958                           " you can retry the operation using '--force'"))
6959     raise errors.OpExecError("Disk consistency error")
6960
6961
6962 class LUInstanceDeactivateDisks(NoHooksLU):
6963   """Shutdown an instance's disks.
6964
6965   """
6966   REQ_BGL = False
6967
6968   def ExpandNames(self):
6969     self._ExpandAndLockInstance()
6970     self.needed_locks[locking.LEVEL_NODE] = []
6971     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6972
6973   def DeclareLocks(self, level):
6974     if level == locking.LEVEL_NODE:
6975       self._LockInstancesNodes()
6976
6977   def CheckPrereq(self):
6978     """Check prerequisites.
6979
6980     This checks that the instance is in the cluster.
6981
6982     """
6983     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984     assert self.instance is not None, \
6985       "Cannot retrieve locked instance %s" % self.op.instance_name
6986
6987   def Exec(self, feedback_fn):
6988     """Deactivate the disks
6989
6990     """
6991     instance = self.instance
6992     if self.op.force:
6993       _ShutdownInstanceDisks(self, instance)
6994     else:
6995       _SafeShutdownInstanceDisks(self, instance)
6996
6997
6998 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6999   """Shutdown block devices of an instance.
7000
7001   This function checks if an instance is running, before calling
7002   _ShutdownInstanceDisks.
7003
7004   """
7005   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7006   _ShutdownInstanceDisks(lu, instance, disks=disks)
7007
7008
7009 def _ExpandCheckDisks(instance, disks):
7010   """Return the instance disks selected by the disks list
7011
7012   @type disks: list of L{objects.Disk} or None
7013   @param disks: selected disks
7014   @rtype: list of L{objects.Disk}
7015   @return: selected instance disks to act on
7016
7017   """
7018   if disks is None:
7019     return instance.disks
7020   else:
7021     if not set(disks).issubset(instance.disks):
7022       raise errors.ProgrammerError("Can only act on disks belonging to the"
7023                                    " target instance")
7024     return disks
7025
7026
7027 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7028   """Shutdown block devices of an instance.
7029
7030   This does the shutdown on all nodes of the instance.
7031
7032   If the ignore_primary is false, errors on the primary node are
7033   ignored.
7034
7035   """
7036   all_result = True
7037   disks = _ExpandCheckDisks(instance, disks)
7038
7039   for disk in disks:
7040     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7041       lu.cfg.SetDiskID(top_disk, node)
7042       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7043       msg = result.fail_msg
7044       if msg:
7045         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7046                       disk.iv_name, node, msg)
7047         if ((node == instance.primary_node and not ignore_primary) or
7048             (node != instance.primary_node and not result.offline)):
7049           all_result = False
7050   return all_result
7051
7052
7053 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7054   """Checks if a node has enough free memory.
7055
7056   This function checks if a given node has the needed amount of free
7057   memory. In case the node has less memory or we cannot get the
7058   information from the node, this function raises an OpPrereqError
7059   exception.
7060
7061   @type lu: C{LogicalUnit}
7062   @param lu: a logical unit from which we get configuration data
7063   @type node: C{str}
7064   @param node: the node to check
7065   @type reason: C{str}
7066   @param reason: string to use in the error message
7067   @type requested: C{int}
7068   @param requested: the amount of memory in MiB to check for
7069   @type hypervisor_name: C{str}
7070   @param hypervisor_name: the hypervisor to ask for memory stats
7071   @rtype: integer
7072   @return: node current free memory
7073   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7074       we cannot check the node
7075
7076   """
7077   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7078   nodeinfo[node].Raise("Can't get data from node %s" % node,
7079                        prereq=True, ecode=errors.ECODE_ENVIRON)
7080   (_, _, (hv_info, )) = nodeinfo[node].payload
7081
7082   free_mem = hv_info.get("memory_free", None)
7083   if not isinstance(free_mem, int):
7084     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7085                                " was '%s'" % (node, free_mem),
7086                                errors.ECODE_ENVIRON)
7087   if requested > free_mem:
7088     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7089                                " needed %s MiB, available %s MiB" %
7090                                (node, reason, requested, free_mem),
7091                                errors.ECODE_NORES)
7092   return free_mem
7093
7094
7095 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7096   """Checks if nodes have enough free disk space in all the VGs.
7097
7098   This function checks if all given nodes have the needed amount of
7099   free disk. In case any node has less disk or we cannot get the
7100   information from the node, this function raises an OpPrereqError
7101   exception.
7102
7103   @type lu: C{LogicalUnit}
7104   @param lu: a logical unit from which we get configuration data
7105   @type nodenames: C{list}
7106   @param nodenames: the list of node names to check
7107   @type req_sizes: C{dict}
7108   @param req_sizes: the hash of vg and corresponding amount of disk in
7109       MiB to check for
7110   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7111       or we cannot check the node
7112
7113   """
7114   for vg, req_size in req_sizes.items():
7115     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7116
7117
7118 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7119   """Checks if nodes have enough free disk space in the specified VG.
7120
7121   This function checks if all given nodes have the needed amount of
7122   free disk. In case any node has less disk or we cannot get the
7123   information from the node, this function raises an OpPrereqError
7124   exception.
7125
7126   @type lu: C{LogicalUnit}
7127   @param lu: a logical unit from which we get configuration data
7128   @type nodenames: C{list}
7129   @param nodenames: the list of node names to check
7130   @type vg: C{str}
7131   @param vg: the volume group to check
7132   @type requested: C{int}
7133   @param requested: the amount of disk in MiB to check for
7134   @raise errors.OpPrereqError: if the node doesn't have enough disk,
7135       or we cannot check the node
7136
7137   """
7138   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7139   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7140   for node in nodenames:
7141     info = nodeinfo[node]
7142     info.Raise("Cannot get current information from node %s" % node,
7143                prereq=True, ecode=errors.ECODE_ENVIRON)
7144     (_, (vg_info, ), _) = info.payload
7145     vg_free = vg_info.get("vg_free", None)
7146     if not isinstance(vg_free, int):
7147       raise errors.OpPrereqError("Can't compute free disk space on node"
7148                                  " %s for vg %s, result was '%s'" %
7149                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
7150     if requested > vg_free:
7151       raise errors.OpPrereqError("Not enough disk space on target node %s"
7152                                  " vg %s: required %d MiB, available %d MiB" %
7153                                  (node, vg, requested, vg_free),
7154                                  errors.ECODE_NORES)
7155
7156
7157 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7158   """Checks if nodes have enough physical CPUs
7159
7160   This function checks if all given nodes have the needed number of
7161   physical CPUs. In case any node has less CPUs or we cannot get the
7162   information from the node, this function raises an OpPrereqError
7163   exception.
7164
7165   @type lu: C{LogicalUnit}
7166   @param lu: a logical unit from which we get configuration data
7167   @type nodenames: C{list}
7168   @param nodenames: the list of node names to check
7169   @type requested: C{int}
7170   @param requested: the minimum acceptable number of physical CPUs
7171   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7172       or we cannot check the node
7173
7174   """
7175   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7176   for node in nodenames:
7177     info = nodeinfo[node]
7178     info.Raise("Cannot get current information from node %s" % node,
7179                prereq=True, ecode=errors.ECODE_ENVIRON)
7180     (_, _, (hv_info, )) = info.payload
7181     num_cpus = hv_info.get("cpu_total", None)
7182     if not isinstance(num_cpus, int):
7183       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7184                                  " on node %s, result was '%s'" %
7185                                  (node, num_cpus), errors.ECODE_ENVIRON)
7186     if requested > num_cpus:
7187       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7188                                  "required" % (node, num_cpus, requested),
7189                                  errors.ECODE_NORES)
7190
7191
7192 class LUInstanceStartup(LogicalUnit):
7193   """Starts an instance.
7194
7195   """
7196   HPATH = "instance-start"
7197   HTYPE = constants.HTYPE_INSTANCE
7198   REQ_BGL = False
7199
7200   def CheckArguments(self):
7201     # extra beparams
7202     if self.op.beparams:
7203       # fill the beparams dict
7204       objects.UpgradeBeParams(self.op.beparams)
7205       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7206
7207   def ExpandNames(self):
7208     self._ExpandAndLockInstance()
7209     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7210
7211   def DeclareLocks(self, level):
7212     if level == locking.LEVEL_NODE_RES:
7213       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7214
7215   def BuildHooksEnv(self):
7216     """Build hooks env.
7217
7218     This runs on master, primary and secondary nodes of the instance.
7219
7220     """
7221     env = {
7222       "FORCE": self.op.force,
7223       }
7224
7225     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7226
7227     return env
7228
7229   def BuildHooksNodes(self):
7230     """Build hooks nodes.
7231
7232     """
7233     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7234     return (nl, nl)
7235
7236   def CheckPrereq(self):
7237     """Check prerequisites.
7238
7239     This checks that the instance is in the cluster.
7240
7241     """
7242     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7243     assert self.instance is not None, \
7244       "Cannot retrieve locked instance %s" % self.op.instance_name
7245
7246     # extra hvparams
7247     if self.op.hvparams:
7248       # check hypervisor parameter syntax (locally)
7249       cluster = self.cfg.GetClusterInfo()
7250       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7251       filled_hvp = cluster.FillHV(instance)
7252       filled_hvp.update(self.op.hvparams)
7253       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7254       hv_type.CheckParameterSyntax(filled_hvp)
7255       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7256
7257     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7258
7259     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7260
7261     if self.primary_offline and self.op.ignore_offline_nodes:
7262       self.LogWarning("Ignoring offline primary node")
7263
7264       if self.op.hvparams or self.op.beparams:
7265         self.LogWarning("Overridden parameters are ignored")
7266     else:
7267       _CheckNodeOnline(self, instance.primary_node)
7268
7269       bep = self.cfg.GetClusterInfo().FillBE(instance)
7270       bep.update(self.op.beparams)
7271
7272       # check bridges existence
7273       _CheckInstanceBridgesExist(self, instance)
7274
7275       remote_info = self.rpc.call_instance_info(instance.primary_node,
7276                                                 instance.name,
7277                                                 instance.hypervisor)
7278       remote_info.Raise("Error checking node %s" % instance.primary_node,
7279                         prereq=True, ecode=errors.ECODE_ENVIRON)
7280       if not remote_info.payload: # not running already
7281         _CheckNodeFreeMemory(self, instance.primary_node,
7282                              "starting instance %s" % instance.name,
7283                              bep[constants.BE_MINMEM], instance.hypervisor)
7284
7285   def Exec(self, feedback_fn):
7286     """Start the instance.
7287
7288     """
7289     instance = self.instance
7290     force = self.op.force
7291
7292     if not self.op.no_remember:
7293       self.cfg.MarkInstanceUp(instance.name)
7294
7295     if self.primary_offline:
7296       assert self.op.ignore_offline_nodes
7297       self.LogInfo("Primary node offline, marked instance as started")
7298     else:
7299       node_current = instance.primary_node
7300
7301       _StartInstanceDisks(self, instance, force)
7302
7303       result = \
7304         self.rpc.call_instance_start(node_current,
7305                                      (instance, self.op.hvparams,
7306                                       self.op.beparams),
7307                                      self.op.startup_paused)
7308       msg = result.fail_msg
7309       if msg:
7310         _ShutdownInstanceDisks(self, instance)
7311         raise errors.OpExecError("Could not start instance: %s" % msg)
7312
7313
7314 class LUInstanceReboot(LogicalUnit):
7315   """Reboot an instance.
7316
7317   """
7318   HPATH = "instance-reboot"
7319   HTYPE = constants.HTYPE_INSTANCE
7320   REQ_BGL = False
7321
7322   def ExpandNames(self):
7323     self._ExpandAndLockInstance()
7324
7325   def BuildHooksEnv(self):
7326     """Build hooks env.
7327
7328     This runs on master, primary and secondary nodes of the instance.
7329
7330     """
7331     env = {
7332       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7333       "REBOOT_TYPE": self.op.reboot_type,
7334       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7335       }
7336
7337     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7338
7339     return env
7340
7341   def BuildHooksNodes(self):
7342     """Build hooks nodes.
7343
7344     """
7345     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7346     return (nl, nl)
7347
7348   def CheckPrereq(self):
7349     """Check prerequisites.
7350
7351     This checks that the instance is in the cluster.
7352
7353     """
7354     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7355     assert self.instance is not None, \
7356       "Cannot retrieve locked instance %s" % self.op.instance_name
7357     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7358     _CheckNodeOnline(self, instance.primary_node)
7359
7360     # check bridges existence
7361     _CheckInstanceBridgesExist(self, instance)
7362
7363   def Exec(self, feedback_fn):
7364     """Reboot the instance.
7365
7366     """
7367     instance = self.instance
7368     ignore_secondaries = self.op.ignore_secondaries
7369     reboot_type = self.op.reboot_type
7370
7371     remote_info = self.rpc.call_instance_info(instance.primary_node,
7372                                               instance.name,
7373                                               instance.hypervisor)
7374     remote_info.Raise("Error checking node %s" % instance.primary_node)
7375     instance_running = bool(remote_info.payload)
7376
7377     node_current = instance.primary_node
7378
7379     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7380                                             constants.INSTANCE_REBOOT_HARD]:
7381       for disk in instance.disks:
7382         self.cfg.SetDiskID(disk, node_current)
7383       result = self.rpc.call_instance_reboot(node_current, instance,
7384                                              reboot_type,
7385                                              self.op.shutdown_timeout)
7386       result.Raise("Could not reboot instance")
7387     else:
7388       if instance_running:
7389         result = self.rpc.call_instance_shutdown(node_current, instance,
7390                                                  self.op.shutdown_timeout)
7391         result.Raise("Could not shutdown instance for full reboot")
7392         _ShutdownInstanceDisks(self, instance)
7393       else:
7394         self.LogInfo("Instance %s was already stopped, starting now",
7395                      instance.name)
7396       _StartInstanceDisks(self, instance, ignore_secondaries)
7397       result = self.rpc.call_instance_start(node_current,
7398                                             (instance, None, None), False)
7399       msg = result.fail_msg
7400       if msg:
7401         _ShutdownInstanceDisks(self, instance)
7402         raise errors.OpExecError("Could not start instance for"
7403                                  " full reboot: %s" % msg)
7404
7405     self.cfg.MarkInstanceUp(instance.name)
7406
7407
7408 class LUInstanceShutdown(LogicalUnit):
7409   """Shutdown an instance.
7410
7411   """
7412   HPATH = "instance-stop"
7413   HTYPE = constants.HTYPE_INSTANCE
7414   REQ_BGL = False
7415
7416   def ExpandNames(self):
7417     self._ExpandAndLockInstance()
7418
7419   def BuildHooksEnv(self):
7420     """Build hooks env.
7421
7422     This runs on master, primary and secondary nodes of the instance.
7423
7424     """
7425     env = _BuildInstanceHookEnvByObject(self, self.instance)
7426     env["TIMEOUT"] = self.op.timeout
7427     return env
7428
7429   def BuildHooksNodes(self):
7430     """Build hooks nodes.
7431
7432     """
7433     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7434     return (nl, nl)
7435
7436   def CheckPrereq(self):
7437     """Check prerequisites.
7438
7439     This checks that the instance is in the cluster.
7440
7441     """
7442     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7443     assert self.instance is not None, \
7444       "Cannot retrieve locked instance %s" % self.op.instance_name
7445
7446     if not self.op.force:
7447       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7448     else:
7449       self.LogWarning("Ignoring offline instance check")
7450
7451     self.primary_offline = \
7452       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7453
7454     if self.primary_offline and self.op.ignore_offline_nodes:
7455       self.LogWarning("Ignoring offline primary node")
7456     else:
7457       _CheckNodeOnline(self, self.instance.primary_node)
7458
7459   def Exec(self, feedback_fn):
7460     """Shutdown the instance.
7461
7462     """
7463     instance = self.instance
7464     node_current = instance.primary_node
7465     timeout = self.op.timeout
7466
7467     # If the instance is offline we shouldn't mark it as down, as that
7468     # resets the offline flag.
7469     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7470       self.cfg.MarkInstanceDown(instance.name)
7471
7472     if self.primary_offline:
7473       assert self.op.ignore_offline_nodes
7474       self.LogInfo("Primary node offline, marked instance as stopped")
7475     else:
7476       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7477       msg = result.fail_msg
7478       if msg:
7479         self.LogWarning("Could not shutdown instance: %s", msg)
7480
7481       _ShutdownInstanceDisks(self, instance)
7482
7483
7484 class LUInstanceReinstall(LogicalUnit):
7485   """Reinstall an instance.
7486
7487   """
7488   HPATH = "instance-reinstall"
7489   HTYPE = constants.HTYPE_INSTANCE
7490   REQ_BGL = False
7491
7492   def ExpandNames(self):
7493     self._ExpandAndLockInstance()
7494
7495   def BuildHooksEnv(self):
7496     """Build hooks env.
7497
7498     This runs on master, primary and secondary nodes of the instance.
7499
7500     """
7501     return _BuildInstanceHookEnvByObject(self, self.instance)
7502
7503   def BuildHooksNodes(self):
7504     """Build hooks nodes.
7505
7506     """
7507     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7508     return (nl, nl)
7509
7510   def CheckPrereq(self):
7511     """Check prerequisites.
7512
7513     This checks that the instance is in the cluster and is not running.
7514
7515     """
7516     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7517     assert instance is not None, \
7518       "Cannot retrieve locked instance %s" % self.op.instance_name
7519     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7520                      " offline, cannot reinstall")
7521
7522     if instance.disk_template == constants.DT_DISKLESS:
7523       raise errors.OpPrereqError("Instance '%s' has no disks" %
7524                                  self.op.instance_name,
7525                                  errors.ECODE_INVAL)
7526     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7527
7528     if self.op.os_type is not None:
7529       # OS verification
7530       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7531       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7532       instance_os = self.op.os_type
7533     else:
7534       instance_os = instance.os
7535
7536     nodelist = list(instance.all_nodes)
7537
7538     if self.op.osparams:
7539       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7540       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7541       self.os_inst = i_osdict # the new dict (without defaults)
7542     else:
7543       self.os_inst = None
7544
7545     self.instance = instance
7546
7547   def Exec(self, feedback_fn):
7548     """Reinstall the instance.
7549
7550     """
7551     inst = self.instance
7552
7553     if self.op.os_type is not None:
7554       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7555       inst.os = self.op.os_type
7556       # Write to configuration
7557       self.cfg.Update(inst, feedback_fn)
7558
7559     _StartInstanceDisks(self, inst, None)
7560     try:
7561       feedback_fn("Running the instance OS create scripts...")
7562       # FIXME: pass debug option from opcode to backend
7563       result = self.rpc.call_instance_os_add(inst.primary_node,
7564                                              (inst, self.os_inst), True,
7565                                              self.op.debug_level)
7566       result.Raise("Could not install OS for instance %s on node %s" %
7567                    (inst.name, inst.primary_node))
7568     finally:
7569       _ShutdownInstanceDisks(self, inst)
7570
7571
7572 class LUInstanceRecreateDisks(LogicalUnit):
7573   """Recreate an instance's missing disks.
7574
7575   """
7576   HPATH = "instance-recreate-disks"
7577   HTYPE = constants.HTYPE_INSTANCE
7578   REQ_BGL = False
7579
7580   _MODIFYABLE = compat.UniqueFrozenset([
7581     constants.IDISK_SIZE,
7582     constants.IDISK_MODE,
7583     ])
7584
7585   # New or changed disk parameters may have different semantics
7586   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7587     constants.IDISK_ADOPT,
7588
7589     # TODO: Implement support changing VG while recreating
7590     constants.IDISK_VG,
7591     constants.IDISK_METAVG,
7592     constants.IDISK_PROVIDER,
7593     ]))
7594
7595   def _RunAllocator(self):
7596     """Run the allocator based on input opcode.
7597
7598     """
7599     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7600
7601     # FIXME
7602     # The allocator should actually run in "relocate" mode, but current
7603     # allocators don't support relocating all the nodes of an instance at
7604     # the same time. As a workaround we use "allocate" mode, but this is
7605     # suboptimal for two reasons:
7606     # - The instance name passed to the allocator is present in the list of
7607     #   existing instances, so there could be a conflict within the
7608     #   internal structures of the allocator. This doesn't happen with the
7609     #   current allocators, but it's a liability.
7610     # - The allocator counts the resources used by the instance twice: once
7611     #   because the instance exists already, and once because it tries to
7612     #   allocate a new instance.
7613     # The allocator could choose some of the nodes on which the instance is
7614     # running, but that's not a problem. If the instance nodes are broken,
7615     # they should be already be marked as drained or offline, and hence
7616     # skipped by the allocator. If instance disks have been lost for other
7617     # reasons, then recreating the disks on the same nodes should be fine.
7618     disk_template = self.instance.disk_template
7619     spindle_use = be_full[constants.BE_SPINDLE_USE]
7620     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7621                                         disk_template=disk_template,
7622                                         tags=list(self.instance.GetTags()),
7623                                         os=self.instance.os,
7624                                         nics=[{}],
7625                                         vcpus=be_full[constants.BE_VCPUS],
7626                                         memory=be_full[constants.BE_MAXMEM],
7627                                         spindle_use=spindle_use,
7628                                         disks=[{constants.IDISK_SIZE: d.size,
7629                                                 constants.IDISK_MODE: d.mode}
7630                                                 for d in self.instance.disks],
7631                                         hypervisor=self.instance.hypervisor,
7632                                         node_whitelist=None)
7633     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7634
7635     ial.Run(self.op.iallocator)
7636
7637     assert req.RequiredNodes() == len(self.instance.all_nodes)
7638
7639     if not ial.success:
7640       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7641                                  " %s" % (self.op.iallocator, ial.info),
7642                                  errors.ECODE_NORES)
7643
7644     self.op.nodes = ial.result
7645     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7646                  self.op.instance_name, self.op.iallocator,
7647                  utils.CommaJoin(ial.result))
7648
7649   def CheckArguments(self):
7650     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7651       # Normalize and convert deprecated list of disk indices
7652       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7653
7654     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7655     if duplicates:
7656       raise errors.OpPrereqError("Some disks have been specified more than"
7657                                  " once: %s" % utils.CommaJoin(duplicates),
7658                                  errors.ECODE_INVAL)
7659
7660     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7661     # when neither iallocator nor nodes are specified
7662     if self.op.iallocator or self.op.nodes:
7663       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7664
7665     for (idx, params) in self.op.disks:
7666       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7667       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7668       if unsupported:
7669         raise errors.OpPrereqError("Parameters for disk %s try to change"
7670                                    " unmodifyable parameter(s): %s" %
7671                                    (idx, utils.CommaJoin(unsupported)),
7672                                    errors.ECODE_INVAL)
7673
7674   def ExpandNames(self):
7675     self._ExpandAndLockInstance()
7676     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7677
7678     if self.op.nodes:
7679       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7680       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7681     else:
7682       self.needed_locks[locking.LEVEL_NODE] = []
7683       if self.op.iallocator:
7684         # iallocator will select a new node in the same group
7685         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7686         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7687
7688     self.needed_locks[locking.LEVEL_NODE_RES] = []
7689
7690   def DeclareLocks(self, level):
7691     if level == locking.LEVEL_NODEGROUP:
7692       assert self.op.iallocator is not None
7693       assert not self.op.nodes
7694       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7695       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7696       # Lock the primary group used by the instance optimistically; this
7697       # requires going via the node before it's locked, requiring
7698       # verification later on
7699       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7700         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7701
7702     elif level == locking.LEVEL_NODE:
7703       # If an allocator is used, then we lock all the nodes in the current
7704       # instance group, as we don't know yet which ones will be selected;
7705       # if we replace the nodes without using an allocator, locks are
7706       # already declared in ExpandNames; otherwise, we need to lock all the
7707       # instance nodes for disk re-creation
7708       if self.op.iallocator:
7709         assert not self.op.nodes
7710         assert not self.needed_locks[locking.LEVEL_NODE]
7711         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7712
7713         # Lock member nodes of the group of the primary node
7714         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7715           self.needed_locks[locking.LEVEL_NODE].extend(
7716             self.cfg.GetNodeGroup(group_uuid).members)
7717
7718         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7719       elif not self.op.nodes:
7720         self._LockInstancesNodes(primary_only=False)
7721     elif level == locking.LEVEL_NODE_RES:
7722       # Copy node locks
7723       self.needed_locks[locking.LEVEL_NODE_RES] = \
7724         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7725
7726   def BuildHooksEnv(self):
7727     """Build hooks env.
7728
7729     This runs on master, primary and secondary nodes of the instance.
7730
7731     """
7732     return _BuildInstanceHookEnvByObject(self, self.instance)
7733
7734   def BuildHooksNodes(self):
7735     """Build hooks nodes.
7736
7737     """
7738     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7739     return (nl, nl)
7740
7741   def CheckPrereq(self):
7742     """Check prerequisites.
7743
7744     This checks that the instance is in the cluster and is not running.
7745
7746     """
7747     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7748     assert instance is not None, \
7749       "Cannot retrieve locked instance %s" % self.op.instance_name
7750     if self.op.nodes:
7751       if len(self.op.nodes) != len(instance.all_nodes):
7752         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7753                                    " %d replacement nodes were specified" %
7754                                    (instance.name, len(instance.all_nodes),
7755                                     len(self.op.nodes)),
7756                                    errors.ECODE_INVAL)
7757       assert instance.disk_template != constants.DT_DRBD8 or \
7758           len(self.op.nodes) == 2
7759       assert instance.disk_template != constants.DT_PLAIN or \
7760           len(self.op.nodes) == 1
7761       primary_node = self.op.nodes[0]
7762     else:
7763       primary_node = instance.primary_node
7764     if not self.op.iallocator:
7765       _CheckNodeOnline(self, primary_node)
7766
7767     if instance.disk_template == constants.DT_DISKLESS:
7768       raise errors.OpPrereqError("Instance '%s' has no disks" %
7769                                  self.op.instance_name, errors.ECODE_INVAL)
7770
7771     # Verify if node group locks are still correct
7772     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7773     if owned_groups:
7774       # Node group locks are acquired only for the primary node (and only
7775       # when the allocator is used)
7776       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7777                                primary_only=True)
7778
7779     # if we replace nodes *and* the old primary is offline, we don't
7780     # check the instance state
7781     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7782     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7783       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7784                           msg="cannot recreate disks")
7785
7786     if self.op.disks:
7787       self.disks = dict(self.op.disks)
7788     else:
7789       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7790
7791     maxidx = max(self.disks.keys())
7792     if maxidx >= len(instance.disks):
7793       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7794                                  errors.ECODE_INVAL)
7795
7796     if ((self.op.nodes or self.op.iallocator) and
7797         sorted(self.disks.keys()) != range(len(instance.disks))):
7798       raise errors.OpPrereqError("Can't recreate disks partially and"
7799                                  " change the nodes at the same time",
7800                                  errors.ECODE_INVAL)
7801
7802     self.instance = instance
7803
7804     if self.op.iallocator:
7805       self._RunAllocator()
7806       # Release unneeded node and node resource locks
7807       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7808       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7809       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7810
7811     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7812
7813   def Exec(self, feedback_fn):
7814     """Recreate the disks.
7815
7816     """
7817     instance = self.instance
7818
7819     assert (self.owned_locks(locking.LEVEL_NODE) ==
7820             self.owned_locks(locking.LEVEL_NODE_RES))
7821
7822     to_skip = []
7823     mods = [] # keeps track of needed changes
7824
7825     for idx, disk in enumerate(instance.disks):
7826       try:
7827         changes = self.disks[idx]
7828       except KeyError:
7829         # Disk should not be recreated
7830         to_skip.append(idx)
7831         continue
7832
7833       # update secondaries for disks, if needed
7834       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7835         # need to update the nodes and minors
7836         assert len(self.op.nodes) == 2
7837         assert len(disk.logical_id) == 6 # otherwise disk internals
7838                                          # have changed
7839         (_, _, old_port, _, _, old_secret) = disk.logical_id
7840         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7841         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7842                   new_minors[0], new_minors[1], old_secret)
7843         assert len(disk.logical_id) == len(new_id)
7844       else:
7845         new_id = None
7846
7847       mods.append((idx, new_id, changes))
7848
7849     # now that we have passed all asserts above, we can apply the mods
7850     # in a single run (to avoid partial changes)
7851     for idx, new_id, changes in mods:
7852       disk = instance.disks[idx]
7853       if new_id is not None:
7854         assert disk.dev_type == constants.LD_DRBD8
7855         disk.logical_id = new_id
7856       if changes:
7857         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7858                     mode=changes.get(constants.IDISK_MODE, None))
7859
7860     # change primary node, if needed
7861     if self.op.nodes:
7862       instance.primary_node = self.op.nodes[0]
7863       self.LogWarning("Changing the instance's nodes, you will have to"
7864                       " remove any disks left on the older nodes manually")
7865
7866     if self.op.nodes:
7867       self.cfg.Update(instance, feedback_fn)
7868
7869     # All touched nodes must be locked
7870     mylocks = self.owned_locks(locking.LEVEL_NODE)
7871     assert mylocks.issuperset(frozenset(instance.all_nodes))
7872     _CreateDisks(self, instance, to_skip=to_skip)
7873
7874
7875 class LUInstanceRename(LogicalUnit):
7876   """Rename an instance.
7877
7878   """
7879   HPATH = "instance-rename"
7880   HTYPE = constants.HTYPE_INSTANCE
7881
7882   def CheckArguments(self):
7883     """Check arguments.
7884
7885     """
7886     if self.op.ip_check and not self.op.name_check:
7887       # TODO: make the ip check more flexible and not depend on the name check
7888       raise errors.OpPrereqError("IP address check requires a name check",
7889                                  errors.ECODE_INVAL)
7890
7891   def BuildHooksEnv(self):
7892     """Build hooks env.
7893
7894     This runs on master, primary and secondary nodes of the instance.
7895
7896     """
7897     env = _BuildInstanceHookEnvByObject(self, self.instance)
7898     env["INSTANCE_NEW_NAME"] = self.op.new_name
7899     return env
7900
7901   def BuildHooksNodes(self):
7902     """Build hooks nodes.
7903
7904     """
7905     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7906     return (nl, nl)
7907
7908   def CheckPrereq(self):
7909     """Check prerequisites.
7910
7911     This checks that the instance is in the cluster and is not running.
7912
7913     """
7914     self.op.instance_name = _ExpandInstanceName(self.cfg,
7915                                                 self.op.instance_name)
7916     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7917     assert instance is not None
7918     _CheckNodeOnline(self, instance.primary_node)
7919     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7920                         msg="cannot rename")
7921     self.instance = instance
7922
7923     new_name = self.op.new_name
7924     if self.op.name_check:
7925       hostname = _CheckHostnameSane(self, new_name)
7926       new_name = self.op.new_name = hostname.name
7927       if (self.op.ip_check and
7928           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7929         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7930                                    (hostname.ip, new_name),
7931                                    errors.ECODE_NOTUNIQUE)
7932
7933     instance_list = self.cfg.GetInstanceList()
7934     if new_name in instance_list and new_name != instance.name:
7935       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7936                                  new_name, errors.ECODE_EXISTS)
7937
7938   def Exec(self, feedback_fn):
7939     """Rename the instance.
7940
7941     """
7942     inst = self.instance
7943     old_name = inst.name
7944
7945     rename_file_storage = False
7946     if (inst.disk_template in constants.DTS_FILEBASED and
7947         self.op.new_name != inst.name):
7948       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7949       rename_file_storage = True
7950
7951     self.cfg.RenameInstance(inst.name, self.op.new_name)
7952     # Change the instance lock. This is definitely safe while we hold the BGL.
7953     # Otherwise the new lock would have to be added in acquired mode.
7954     assert self.REQ_BGL
7955     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7956     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7957     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7958
7959     # re-read the instance from the configuration after rename
7960     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7961
7962     if rename_file_storage:
7963       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7964       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7965                                                      old_file_storage_dir,
7966                                                      new_file_storage_dir)
7967       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7968                    " (but the instance has been renamed in Ganeti)" %
7969                    (inst.primary_node, old_file_storage_dir,
7970                     new_file_storage_dir))
7971
7972     _StartInstanceDisks(self, inst, None)
7973     # update info on disks
7974     info = _GetInstanceInfoText(inst)
7975     for (idx, disk) in enumerate(inst.disks):
7976       for node in inst.all_nodes:
7977         self.cfg.SetDiskID(disk, node)
7978         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7979         if result.fail_msg:
7980           self.LogWarning("Error setting info on node %s for disk %s: %s",
7981                           node, idx, result.fail_msg)
7982     try:
7983       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7984                                                  old_name, self.op.debug_level)
7985       msg = result.fail_msg
7986       if msg:
7987         msg = ("Could not run OS rename script for instance %s on node %s"
7988                " (but the instance has been renamed in Ganeti): %s" %
7989                (inst.name, inst.primary_node, msg))
7990         self.LogWarning(msg)
7991     finally:
7992       _ShutdownInstanceDisks(self, inst)
7993
7994     return inst.name
7995
7996
7997 class LUInstanceRemove(LogicalUnit):
7998   """Remove an instance.
7999
8000   """
8001   HPATH = "instance-remove"
8002   HTYPE = constants.HTYPE_INSTANCE
8003   REQ_BGL = False
8004
8005   def ExpandNames(self):
8006     self._ExpandAndLockInstance()
8007     self.needed_locks[locking.LEVEL_NODE] = []
8008     self.needed_locks[locking.LEVEL_NODE_RES] = []
8009     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8010
8011   def DeclareLocks(self, level):
8012     if level == locking.LEVEL_NODE:
8013       self._LockInstancesNodes()
8014     elif level == locking.LEVEL_NODE_RES:
8015       # Copy node locks
8016       self.needed_locks[locking.LEVEL_NODE_RES] = \
8017         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8018
8019   def BuildHooksEnv(self):
8020     """Build hooks env.
8021
8022     This runs on master, primary and secondary nodes of the instance.
8023
8024     """
8025     env = _BuildInstanceHookEnvByObject(self, self.instance)
8026     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8027     return env
8028
8029   def BuildHooksNodes(self):
8030     """Build hooks nodes.
8031
8032     """
8033     nl = [self.cfg.GetMasterNode()]
8034     nl_post = list(self.instance.all_nodes) + nl
8035     return (nl, nl_post)
8036
8037   def CheckPrereq(self):
8038     """Check prerequisites.
8039
8040     This checks that the instance is in the cluster.
8041
8042     """
8043     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8044     assert self.instance is not None, \
8045       "Cannot retrieve locked instance %s" % self.op.instance_name
8046
8047   def Exec(self, feedback_fn):
8048     """Remove the instance.
8049
8050     """
8051     instance = self.instance
8052     logging.info("Shutting down instance %s on node %s",
8053                  instance.name, instance.primary_node)
8054
8055     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8056                                              self.op.shutdown_timeout)
8057     msg = result.fail_msg
8058     if msg:
8059       if self.op.ignore_failures:
8060         feedback_fn("Warning: can't shutdown instance: %s" % msg)
8061       else:
8062         raise errors.OpExecError("Could not shutdown instance %s on"
8063                                  " node %s: %s" %
8064                                  (instance.name, instance.primary_node, msg))
8065
8066     assert (self.owned_locks(locking.LEVEL_NODE) ==
8067             self.owned_locks(locking.LEVEL_NODE_RES))
8068     assert not (set(instance.all_nodes) -
8069                 self.owned_locks(locking.LEVEL_NODE)), \
8070       "Not owning correct locks"
8071
8072     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8073
8074
8075 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8076   """Utility function to remove an instance.
8077
8078   """
8079   logging.info("Removing block devices for instance %s", instance.name)
8080
8081   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8082     if not ignore_failures:
8083       raise errors.OpExecError("Can't remove instance's disks")
8084     feedback_fn("Warning: can't remove instance's disks")
8085
8086   logging.info("Removing instance %s out of cluster config", instance.name)
8087
8088   lu.cfg.RemoveInstance(instance.name)
8089
8090   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8091     "Instance lock removal conflict"
8092
8093   # Remove lock for the instance
8094   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8095
8096
8097 class LUInstanceQuery(NoHooksLU):
8098   """Logical unit for querying instances.
8099
8100   """
8101   # pylint: disable=W0142
8102   REQ_BGL = False
8103
8104   def CheckArguments(self):
8105     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8106                              self.op.output_fields, self.op.use_locking)
8107
8108   def ExpandNames(self):
8109     self.iq.ExpandNames(self)
8110
8111   def DeclareLocks(self, level):
8112     self.iq.DeclareLocks(self, level)
8113
8114   def Exec(self, feedback_fn):
8115     return self.iq.OldStyleQuery(self)
8116
8117
8118 def _ExpandNamesForMigration(lu):
8119   """Expands names for use with L{TLMigrateInstance}.
8120
8121   @type lu: L{LogicalUnit}
8122
8123   """
8124   if lu.op.target_node is not None:
8125     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8126
8127   lu.needed_locks[locking.LEVEL_NODE] = []
8128   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8129
8130   lu.needed_locks[locking.LEVEL_NODE_RES] = []
8131   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8132
8133   # The node allocation lock is actually only needed for replicated instances
8134   # (e.g. DRBD8) and if an iallocator is used.
8135   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8136
8137
8138 def _DeclareLocksForMigration(lu, level):
8139   """Declares locks for L{TLMigrateInstance}.
8140
8141   @type lu: L{LogicalUnit}
8142   @param level: Lock level
8143
8144   """
8145   if level == locking.LEVEL_NODE_ALLOC:
8146     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8147
8148     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8149
8150     # Node locks are already declared here rather than at LEVEL_NODE as we need
8151     # the instance object anyway to declare the node allocation lock.
8152     if instance.disk_template in constants.DTS_EXT_MIRROR:
8153       if lu.op.target_node is None:
8154         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8155         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8156       else:
8157         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8158                                                lu.op.target_node]
8159       del lu.recalculate_locks[locking.LEVEL_NODE]
8160     else:
8161       lu._LockInstancesNodes() # pylint: disable=W0212
8162
8163   elif level == locking.LEVEL_NODE:
8164     # Node locks are declared together with the node allocation lock
8165     assert (lu.needed_locks[locking.LEVEL_NODE] or
8166             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8167
8168   elif level == locking.LEVEL_NODE_RES:
8169     # Copy node locks
8170     lu.needed_locks[locking.LEVEL_NODE_RES] = \
8171       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8172
8173
8174 class LUInstanceFailover(LogicalUnit):
8175   """Failover an instance.
8176
8177   """
8178   HPATH = "instance-failover"
8179   HTYPE = constants.HTYPE_INSTANCE
8180   REQ_BGL = False
8181
8182   def CheckArguments(self):
8183     """Check the arguments.
8184
8185     """
8186     self.iallocator = getattr(self.op, "iallocator", None)
8187     self.target_node = getattr(self.op, "target_node", None)
8188
8189   def ExpandNames(self):
8190     self._ExpandAndLockInstance()
8191     _ExpandNamesForMigration(self)
8192
8193     self._migrater = \
8194       TLMigrateInstance(self, self.op.instance_name, False, True, False,
8195                         self.op.ignore_consistency, True,
8196                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
8197
8198     self.tasklets = [self._migrater]
8199
8200   def DeclareLocks(self, level):
8201     _DeclareLocksForMigration(self, level)
8202
8203   def BuildHooksEnv(self):
8204     """Build hooks env.
8205
8206     This runs on master, primary and secondary nodes of the instance.
8207
8208     """
8209     instance = self._migrater.instance
8210     source_node = instance.primary_node
8211     target_node = self.op.target_node
8212     env = {
8213       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8214       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8215       "OLD_PRIMARY": source_node,
8216       "NEW_PRIMARY": target_node,
8217       }
8218
8219     if instance.disk_template in constants.DTS_INT_MIRROR:
8220       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8221       env["NEW_SECONDARY"] = source_node
8222     else:
8223       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8224
8225     env.update(_BuildInstanceHookEnvByObject(self, instance))
8226
8227     return env
8228
8229   def BuildHooksNodes(self):
8230     """Build hooks nodes.
8231
8232     """
8233     instance = self._migrater.instance
8234     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8235     return (nl, nl + [instance.primary_node])
8236
8237
8238 class LUInstanceMigrate(LogicalUnit):
8239   """Migrate an instance.
8240
8241   This is migration without shutting down, compared to the failover,
8242   which is done with shutdown.
8243
8244   """
8245   HPATH = "instance-migrate"
8246   HTYPE = constants.HTYPE_INSTANCE
8247   REQ_BGL = False
8248
8249   def ExpandNames(self):
8250     self._ExpandAndLockInstance()
8251     _ExpandNamesForMigration(self)
8252
8253     self._migrater = \
8254       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8255                         False, self.op.allow_failover, False,
8256                         self.op.allow_runtime_changes,
8257                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
8258                         self.op.ignore_ipolicy)
8259
8260     self.tasklets = [self._migrater]
8261
8262   def DeclareLocks(self, level):
8263     _DeclareLocksForMigration(self, level)
8264
8265   def BuildHooksEnv(self):
8266     """Build hooks env.
8267
8268     This runs on master, primary and secondary nodes of the instance.
8269
8270     """
8271     instance = self._migrater.instance
8272     source_node = instance.primary_node
8273     target_node = self.op.target_node
8274     env = _BuildInstanceHookEnvByObject(self, instance)
8275     env.update({
8276       "MIGRATE_LIVE": self._migrater.live,
8277       "MIGRATE_CLEANUP": self.op.cleanup,
8278       "OLD_PRIMARY": source_node,
8279       "NEW_PRIMARY": target_node,
8280       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8281       })
8282
8283     if instance.disk_template in constants.DTS_INT_MIRROR:
8284       env["OLD_SECONDARY"] = target_node
8285       env["NEW_SECONDARY"] = source_node
8286     else:
8287       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8288
8289     return env
8290
8291   def BuildHooksNodes(self):
8292     """Build hooks nodes.
8293
8294     """
8295     instance = self._migrater.instance
8296     snodes = list(instance.secondary_nodes)
8297     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8298     return (nl, nl)
8299
8300
8301 class LUInstanceMove(LogicalUnit):
8302   """Move an instance by data-copying.
8303
8304   """
8305   HPATH = "instance-move"
8306   HTYPE = constants.HTYPE_INSTANCE
8307   REQ_BGL = False
8308
8309   def ExpandNames(self):
8310     self._ExpandAndLockInstance()
8311     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8312     self.op.target_node = target_node
8313     self.needed_locks[locking.LEVEL_NODE] = [target_node]
8314     self.needed_locks[locking.LEVEL_NODE_RES] = []
8315     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8316
8317   def DeclareLocks(self, level):
8318     if level == locking.LEVEL_NODE:
8319       self._LockInstancesNodes(primary_only=True)
8320     elif level == locking.LEVEL_NODE_RES:
8321       # Copy node locks
8322       self.needed_locks[locking.LEVEL_NODE_RES] = \
8323         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8324
8325   def BuildHooksEnv(self):
8326     """Build hooks env.
8327
8328     This runs on master, primary and secondary nodes of the instance.
8329
8330     """
8331     env = {
8332       "TARGET_NODE": self.op.target_node,
8333       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8334       }
8335     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8336     return env
8337
8338   def BuildHooksNodes(self):
8339     """Build hooks nodes.
8340
8341     """
8342     nl = [
8343       self.cfg.GetMasterNode(),
8344       self.instance.primary_node,
8345       self.op.target_node,
8346       ]
8347     return (nl, nl)
8348
8349   def CheckPrereq(self):
8350     """Check prerequisites.
8351
8352     This checks that the instance is in the cluster.
8353
8354     """
8355     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8356     assert self.instance is not None, \
8357       "Cannot retrieve locked instance %s" % self.op.instance_name
8358
8359     node = self.cfg.GetNodeInfo(self.op.target_node)
8360     assert node is not None, \
8361       "Cannot retrieve locked node %s" % self.op.target_node
8362
8363     self.target_node = target_node = node.name
8364
8365     if target_node == instance.primary_node:
8366       raise errors.OpPrereqError("Instance %s is already on the node %s" %
8367                                  (instance.name, target_node),
8368                                  errors.ECODE_STATE)
8369
8370     bep = self.cfg.GetClusterInfo().FillBE(instance)
8371
8372     for idx, dsk in enumerate(instance.disks):
8373       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8374         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8375                                    " cannot copy" % idx, errors.ECODE_STATE)
8376
8377     _CheckNodeOnline(self, target_node)
8378     _CheckNodeNotDrained(self, target_node)
8379     _CheckNodeVmCapable(self, target_node)
8380     cluster = self.cfg.GetClusterInfo()
8381     group_info = self.cfg.GetNodeGroup(node.group)
8382     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8383     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8384                             ignore=self.op.ignore_ipolicy)
8385
8386     if instance.admin_state == constants.ADMINST_UP:
8387       # check memory requirements on the secondary node
8388       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8389                            instance.name, bep[constants.BE_MAXMEM],
8390                            instance.hypervisor)
8391     else:
8392       self.LogInfo("Not checking memory on the secondary node as"
8393                    " instance will not be started")
8394
8395     # check bridge existance
8396     _CheckInstanceBridgesExist(self, instance, node=target_node)
8397
8398   def Exec(self, feedback_fn):
8399     """Move an instance.
8400
8401     The move is done by shutting it down on its present node, copying
8402     the data over (slow) and starting it on the new node.
8403
8404     """
8405     instance = self.instance
8406
8407     source_node = instance.primary_node
8408     target_node = self.target_node
8409
8410     self.LogInfo("Shutting down instance %s on source node %s",
8411                  instance.name, source_node)
8412
8413     assert (self.owned_locks(locking.LEVEL_NODE) ==
8414             self.owned_locks(locking.LEVEL_NODE_RES))
8415
8416     result = self.rpc.call_instance_shutdown(source_node, instance,
8417                                              self.op.shutdown_timeout)
8418     msg = result.fail_msg
8419     if msg:
8420       if self.op.ignore_consistency:
8421         self.LogWarning("Could not shutdown instance %s on node %s."
8422                         " Proceeding anyway. Please make sure node"
8423                         " %s is down. Error details: %s",
8424                         instance.name, source_node, source_node, msg)
8425       else:
8426         raise errors.OpExecError("Could not shutdown instance %s on"
8427                                  " node %s: %s" %
8428                                  (instance.name, source_node, msg))
8429
8430     # create the target disks
8431     try:
8432       _CreateDisks(self, instance, target_node=target_node)
8433     except errors.OpExecError:
8434       self.LogWarning("Device creation failed, reverting...")
8435       try:
8436         _RemoveDisks(self, instance, target_node=target_node)
8437       finally:
8438         self.cfg.ReleaseDRBDMinors(instance.name)
8439         raise
8440
8441     cluster_name = self.cfg.GetClusterInfo().cluster_name
8442
8443     errs = []
8444     # activate, get path, copy the data over
8445     for idx, disk in enumerate(instance.disks):
8446       self.LogInfo("Copying data for disk %d", idx)
8447       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8448                                                instance.name, True, idx)
8449       if result.fail_msg:
8450         self.LogWarning("Can't assemble newly created disk %d: %s",
8451                         idx, result.fail_msg)
8452         errs.append(result.fail_msg)
8453         break
8454       dev_path = result.payload
8455       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8456                                              target_node, dev_path,
8457                                              cluster_name)
8458       if result.fail_msg:
8459         self.LogWarning("Can't copy data over for disk %d: %s",
8460                         idx, result.fail_msg)
8461         errs.append(result.fail_msg)
8462         break
8463
8464     if errs:
8465       self.LogWarning("Some disks failed to copy, aborting")
8466       try:
8467         _RemoveDisks(self, instance, target_node=target_node)
8468       finally:
8469         self.cfg.ReleaseDRBDMinors(instance.name)
8470         raise errors.OpExecError("Errors during disk copy: %s" %
8471                                  (",".join(errs),))
8472
8473     instance.primary_node = target_node
8474     self.cfg.Update(instance, feedback_fn)
8475
8476     self.LogInfo("Removing the disks on the original node")
8477     _RemoveDisks(self, instance, target_node=source_node)
8478
8479     # Only start the instance if it's marked as up
8480     if instance.admin_state == constants.ADMINST_UP:
8481       self.LogInfo("Starting instance %s on node %s",
8482                    instance.name, target_node)
8483
8484       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8485                                            ignore_secondaries=True)
8486       if not disks_ok:
8487         _ShutdownInstanceDisks(self, instance)
8488         raise errors.OpExecError("Can't activate the instance's disks")
8489
8490       result = self.rpc.call_instance_start(target_node,
8491                                             (instance, None, None), False)
8492       msg = result.fail_msg
8493       if msg:
8494         _ShutdownInstanceDisks(self, instance)
8495         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8496                                  (instance.name, target_node, msg))
8497
8498
8499 class LUNodeMigrate(LogicalUnit):
8500   """Migrate all instances from a node.
8501
8502   """
8503   HPATH = "node-migrate"
8504   HTYPE = constants.HTYPE_NODE
8505   REQ_BGL = False
8506
8507   def CheckArguments(self):
8508     pass
8509
8510   def ExpandNames(self):
8511     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8512
8513     self.share_locks = _ShareAll()
8514     self.needed_locks = {
8515       locking.LEVEL_NODE: [self.op.node_name],
8516       }
8517
8518   def BuildHooksEnv(self):
8519     """Build hooks env.
8520
8521     This runs on the master, the primary and all the secondaries.
8522
8523     """
8524     return {
8525       "NODE_NAME": self.op.node_name,
8526       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8527       }
8528
8529   def BuildHooksNodes(self):
8530     """Build hooks nodes.
8531
8532     """
8533     nl = [self.cfg.GetMasterNode()]
8534     return (nl, nl)
8535
8536   def CheckPrereq(self):
8537     pass
8538
8539   def Exec(self, feedback_fn):
8540     # Prepare jobs for migration instances
8541     allow_runtime_changes = self.op.allow_runtime_changes
8542     jobs = [
8543       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8544                                  mode=self.op.mode,
8545                                  live=self.op.live,
8546                                  iallocator=self.op.iallocator,
8547                                  target_node=self.op.target_node,
8548                                  allow_runtime_changes=allow_runtime_changes,
8549                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8550       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8551
8552     # TODO: Run iallocator in this opcode and pass correct placement options to
8553     # OpInstanceMigrate. Since other jobs can modify the cluster between
8554     # running the iallocator and the actual migration, a good consistency model
8555     # will have to be found.
8556
8557     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8558             frozenset([self.op.node_name]))
8559
8560     return ResultWithJobs(jobs)
8561
8562
8563 class TLMigrateInstance(Tasklet):
8564   """Tasklet class for instance migration.
8565
8566   @type live: boolean
8567   @ivar live: whether the migration will be done live or non-live;
8568       this variable is initalized only after CheckPrereq has run
8569   @type cleanup: boolean
8570   @ivar cleanup: Wheater we cleanup from a failed migration
8571   @type iallocator: string
8572   @ivar iallocator: The iallocator used to determine target_node
8573   @type target_node: string
8574   @ivar target_node: If given, the target_node to reallocate the instance to
8575   @type failover: boolean
8576   @ivar failover: Whether operation results in failover or migration
8577   @type fallback: boolean
8578   @ivar fallback: Whether fallback to failover is allowed if migration not
8579                   possible
8580   @type ignore_consistency: boolean
8581   @ivar ignore_consistency: Wheter we should ignore consistency between source
8582                             and target node
8583   @type shutdown_timeout: int
8584   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8585   @type ignore_ipolicy: bool
8586   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8587
8588   """
8589
8590   # Constants
8591   _MIGRATION_POLL_INTERVAL = 1      # seconds
8592   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8593
8594   def __init__(self, lu, instance_name, cleanup, failover, fallback,
8595                ignore_consistency, allow_runtime_changes, shutdown_timeout,
8596                ignore_ipolicy):
8597     """Initializes this class.
8598
8599     """
8600     Tasklet.__init__(self, lu)
8601
8602     # Parameters
8603     self.instance_name = instance_name
8604     self.cleanup = cleanup
8605     self.live = False # will be overridden later
8606     self.failover = failover
8607     self.fallback = fallback
8608     self.ignore_consistency = ignore_consistency
8609     self.shutdown_timeout = shutdown_timeout
8610     self.ignore_ipolicy = ignore_ipolicy
8611     self.allow_runtime_changes = allow_runtime_changes
8612
8613   def CheckPrereq(self):
8614     """Check prerequisites.
8615
8616     This checks that the instance is in the cluster.
8617
8618     """
8619     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8620     instance = self.cfg.GetInstanceInfo(instance_name)
8621     assert instance is not None
8622     self.instance = instance
8623     cluster = self.cfg.GetClusterInfo()
8624
8625     if (not self.cleanup and
8626         not instance.admin_state == constants.ADMINST_UP and
8627         not self.failover and self.fallback):
8628       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8629                       " switching to failover")
8630       self.failover = True
8631
8632     if instance.disk_template not in constants.DTS_MIRRORED:
8633       if self.failover:
8634         text = "failovers"
8635       else:
8636         text = "migrations"
8637       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8638                                  " %s" % (instance.disk_template, text),
8639                                  errors.ECODE_STATE)
8640
8641     if instance.disk_template in constants.DTS_EXT_MIRROR:
8642       assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8643
8644       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8645
8646       if self.lu.op.iallocator:
8647         self._RunAllocator()
8648       else:
8649         # We set set self.target_node as it is required by
8650         # BuildHooksEnv
8651         self.target_node = self.lu.op.target_node
8652
8653       # Check that the target node is correct in terms of instance policy
8654       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8655       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8656       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8657                                                               group_info)
8658       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8659                               ignore=self.ignore_ipolicy)
8660
8661       # self.target_node is already populated, either directly or by the
8662       # iallocator run
8663       target_node = self.target_node
8664       if self.target_node == instance.primary_node:
8665         raise errors.OpPrereqError("Cannot migrate instance %s"
8666                                    " to its primary (%s)" %
8667                                    (instance.name, instance.primary_node),
8668                                    errors.ECODE_STATE)
8669
8670       if len(self.lu.tasklets) == 1:
8671         # It is safe to release locks only when we're the only tasklet
8672         # in the LU
8673         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8674                       keep=[instance.primary_node, self.target_node])
8675         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8676
8677     else:
8678       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8679
8680       secondary_nodes = instance.secondary_nodes
8681       if not secondary_nodes:
8682         raise errors.ConfigurationError("No secondary node but using"
8683                                         " %s disk template" %
8684                                         instance.disk_template)
8685       target_node = secondary_nodes[0]
8686       if self.lu.op.iallocator or (self.lu.op.target_node and
8687                                    self.lu.op.target_node != target_node):
8688         if self.failover:
8689           text = "failed over"
8690         else:
8691           text = "migrated"
8692         raise errors.OpPrereqError("Instances with disk template %s cannot"
8693                                    " be %s to arbitrary nodes"
8694                                    " (neither an iallocator nor a target"
8695                                    " node can be passed)" %
8696                                    (instance.disk_template, text),
8697                                    errors.ECODE_INVAL)
8698       nodeinfo = self.cfg.GetNodeInfo(target_node)
8699       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8700       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8701                                                               group_info)
8702       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8703                               ignore=self.ignore_ipolicy)
8704
8705     i_be = cluster.FillBE(instance)
8706
8707     # check memory requirements on the secondary node
8708     if (not self.cleanup and
8709          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8710       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8711                                                "migrating instance %s" %
8712                                                instance.name,
8713                                                i_be[constants.BE_MINMEM],
8714                                                instance.hypervisor)
8715     else:
8716       self.lu.LogInfo("Not checking memory on the secondary node as"
8717                       " instance will not be started")
8718
8719     # check if failover must be forced instead of migration
8720     if (not self.cleanup and not self.failover and
8721         i_be[constants.BE_ALWAYS_FAILOVER]):
8722       self.lu.LogInfo("Instance configured to always failover; fallback"
8723                       " to failover")
8724       self.failover = True
8725
8726     # check bridge existance
8727     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8728
8729     if not self.cleanup:
8730       _CheckNodeNotDrained(self.lu, target_node)
8731       if not self.failover:
8732         result = self.rpc.call_instance_migratable(instance.primary_node,
8733                                                    instance)
8734         if result.fail_msg and self.fallback:
8735           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8736                           " failover")
8737           self.failover = True
8738         else:
8739           result.Raise("Can't migrate, please use failover",
8740                        prereq=True, ecode=errors.ECODE_STATE)
8741
8742     assert not (self.failover and self.cleanup)
8743
8744     if not self.failover:
8745       if self.lu.op.live is not None and self.lu.op.mode is not None:
8746         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8747                                    " parameters are accepted",
8748                                    errors.ECODE_INVAL)
8749       if self.lu.op.live is not None:
8750         if self.lu.op.live:
8751           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8752         else:
8753           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8754         # reset the 'live' parameter to None so that repeated
8755         # invocations of CheckPrereq do not raise an exception
8756         self.lu.op.live = None
8757       elif self.lu.op.mode is None:
8758         # read the default value from the hypervisor
8759         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8760         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8761
8762       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8763     else:
8764       # Failover is never live
8765       self.live = False
8766
8767     if not (self.failover or self.cleanup):
8768       remote_info = self.rpc.call_instance_info(instance.primary_node,
8769                                                 instance.name,
8770                                                 instance.hypervisor)
8771       remote_info.Raise("Error checking instance on node %s" %
8772                         instance.primary_node)
8773       instance_running = bool(remote_info.payload)
8774       if instance_running:
8775         self.current_mem = int(remote_info.payload["memory"])
8776
8777   def _RunAllocator(self):
8778     """Run the allocator based on input opcode.
8779
8780     """
8781     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8782
8783     # FIXME: add a self.ignore_ipolicy option
8784     req = iallocator.IAReqRelocate(name=self.instance_name,
8785                                    relocate_from=[self.instance.primary_node])
8786     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8787
8788     ial.Run(self.lu.op.iallocator)
8789
8790     if not ial.success:
8791       raise errors.OpPrereqError("Can't compute nodes using"
8792                                  " iallocator '%s': %s" %
8793                                  (self.lu.op.iallocator, ial.info),
8794                                  errors.ECODE_NORES)
8795     self.target_node = ial.result[0]
8796     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8797                     self.instance_name, self.lu.op.iallocator,
8798                     utils.CommaJoin(ial.result))
8799
8800   def _WaitUntilSync(self):
8801     """Poll with custom rpc for disk sync.
8802
8803     This uses our own step-based rpc call.
8804
8805     """
8806     self.feedback_fn("* wait until resync is done")
8807     all_done = False
8808     while not all_done:
8809       all_done = True
8810       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8811                                             self.nodes_ip,
8812                                             (self.instance.disks,
8813                                              self.instance))
8814       min_percent = 100
8815       for node, nres in result.items():
8816         nres.Raise("Cannot resync disks on node %s" % node)
8817         node_done, node_percent = nres.payload
8818         all_done = all_done and node_done
8819         if node_percent is not None:
8820           min_percent = min(min_percent, node_percent)
8821       if not all_done:
8822         if min_percent < 100:
8823           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8824         time.sleep(2)
8825
8826   def _EnsureSecondary(self, node):
8827     """Demote a node to secondary.
8828
8829     """
8830     self.feedback_fn("* switching node %s to secondary mode" % node)
8831
8832     for dev in self.instance.disks:
8833       self.cfg.SetDiskID(dev, node)
8834
8835     result = self.rpc.call_blockdev_close(node, self.instance.name,
8836                                           self.instance.disks)
8837     result.Raise("Cannot change disk to secondary on node %s" % node)
8838
8839   def _GoStandalone(self):
8840     """Disconnect from the network.
8841
8842     """
8843     self.feedback_fn("* changing into standalone mode")
8844     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8845                                                self.instance.disks)
8846     for node, nres in result.items():
8847       nres.Raise("Cannot disconnect disks node %s" % node)
8848
8849   def _GoReconnect(self, multimaster):
8850     """Reconnect to the network.
8851
8852     """
8853     if multimaster:
8854       msg = "dual-master"
8855     else:
8856       msg = "single-master"
8857     self.feedback_fn("* changing disks into %s mode" % msg)
8858     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8859                                            (self.instance.disks, self.instance),
8860                                            self.instance.name, multimaster)
8861     for node, nres in result.items():
8862       nres.Raise("Cannot change disks config on node %s" % node)
8863
8864   def _ExecCleanup(self):
8865     """Try to cleanup after a failed migration.
8866
8867     The cleanup is done by:
8868       - check that the instance is running only on one node
8869         (and update the config if needed)
8870       - change disks on its secondary node to secondary
8871       - wait until disks are fully synchronized
8872       - disconnect from the network
8873       - change disks into single-master mode
8874       - wait again until disks are fully synchronized
8875
8876     """
8877     instance = self.instance
8878     target_node = self.target_node
8879     source_node = self.source_node
8880
8881     # check running on only one node
8882     self.feedback_fn("* checking where the instance actually runs"
8883                      " (if this hangs, the hypervisor might be in"
8884                      " a bad state)")
8885     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8886     for node, result in ins_l.items():
8887       result.Raise("Can't contact node %s" % node)
8888
8889     runningon_source = instance.name in ins_l[source_node].payload
8890     runningon_target = instance.name in ins_l[target_node].payload
8891
8892     if runningon_source and runningon_target:
8893       raise errors.OpExecError("Instance seems to be running on two nodes,"
8894                                " or the hypervisor is confused; you will have"
8895                                " to ensure manually that it runs only on one"
8896                                " and restart this operation")
8897
8898     if not (runningon_source or runningon_target):
8899       raise errors.OpExecError("Instance does not seem to be running at all;"
8900                                " in this case it's safer to repair by"
8901                                " running 'gnt-instance stop' to ensure disk"
8902                                " shutdown, and then restarting it")
8903
8904     if runningon_target:
8905       # the migration has actually succeeded, we need to update the config
8906       self.feedback_fn("* instance running on secondary node (%s),"
8907                        " updating config" % target_node)
8908       instance.primary_node = target_node
8909       self.cfg.Update(instance, self.feedback_fn)
8910       demoted_node = source_node
8911     else:
8912       self.feedback_fn("* instance confirmed to be running on its"
8913                        " primary node (%s)" % source_node)
8914       demoted_node = target_node
8915
8916     if instance.disk_template in constants.DTS_INT_MIRROR:
8917       self._EnsureSecondary(demoted_node)
8918       try:
8919         self._WaitUntilSync()
8920       except errors.OpExecError:
8921         # we ignore here errors, since if the device is standalone, it
8922         # won't be able to sync
8923         pass
8924       self._GoStandalone()
8925       self._GoReconnect(False)
8926       self._WaitUntilSync()
8927
8928     self.feedback_fn("* done")
8929
8930   def _RevertDiskStatus(self):
8931     """Try to revert the disk status after a failed migration.
8932
8933     """
8934     target_node = self.target_node
8935     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8936       return
8937
8938     try:
8939       self._EnsureSecondary(target_node)
8940       self._GoStandalone()
8941       self._GoReconnect(False)
8942       self._WaitUntilSync()
8943     except errors.OpExecError, err:
8944       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8945                          " please try to recover the instance manually;"
8946                          " error '%s'" % str(err))
8947
8948   def _AbortMigration(self):
8949     """Call the hypervisor code to abort a started migration.
8950
8951     """
8952     instance = self.instance
8953     target_node = self.target_node
8954     source_node = self.source_node
8955     migration_info = self.migration_info
8956
8957     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8958                                                                  instance,
8959                                                                  migration_info,
8960                                                                  False)
8961     abort_msg = abort_result.fail_msg
8962     if abort_msg:
8963       logging.error("Aborting migration failed on target node %s: %s",
8964                     target_node, abort_msg)
8965       # Don't raise an exception here, as we stil have to try to revert the
8966       # disk status, even if this step failed.
8967
8968     abort_result = self.rpc.call_instance_finalize_migration_src(
8969       source_node, instance, False, self.live)
8970     abort_msg = abort_result.fail_msg
8971     if abort_msg:
8972       logging.error("Aborting migration failed on source node %s: %s",
8973                     source_node, abort_msg)
8974
8975   def _ExecMigration(self):
8976     """Migrate an instance.
8977
8978     The migrate is done by:
8979       - change the disks into dual-master mode
8980       - wait until disks are fully synchronized again
8981       - migrate the instance
8982       - change disks on the new secondary node (the old primary) to secondary
8983       - wait until disks are fully synchronized
8984       - change disks into single-master mode
8985
8986     """
8987     instance = self.instance
8988     target_node = self.target_node
8989     source_node = self.source_node
8990
8991     # Check for hypervisor version mismatch and warn the user.
8992     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8993                                        None, [self.instance.hypervisor], False)
8994     for ninfo in nodeinfo.values():
8995       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8996                   ninfo.node)
8997     (_, _, (src_info, )) = nodeinfo[source_node].payload
8998     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8999
9000     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9001         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9002       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9003       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9004       if src_version != dst_version:
9005         self.feedback_fn("* warning: hypervisor version mismatch between"
9006                          " source (%s) and target (%s) node" %
9007                          (src_version, dst_version))
9008
9009     self.feedback_fn("* checking disk consistency between source and target")
9010     for (idx, dev) in enumerate(instance.disks):
9011       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9012         raise errors.OpExecError("Disk %s is degraded or not fully"
9013                                  " synchronized on target node,"
9014                                  " aborting migration" % idx)
9015
9016     if self.current_mem > self.tgt_free_mem:
9017       if not self.allow_runtime_changes:
9018         raise errors.OpExecError("Memory ballooning not allowed and not enough"
9019                                  " free memory to fit instance %s on target"
9020                                  " node %s (have %dMB, need %dMB)" %
9021                                  (instance.name, target_node,
9022                                   self.tgt_free_mem, self.current_mem))
9023       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9024       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9025                                                      instance,
9026                                                      self.tgt_free_mem)
9027       rpcres.Raise("Cannot modify instance runtime memory")
9028
9029     # First get the migration information from the remote node
9030     result = self.rpc.call_migration_info(source_node, instance)
9031     msg = result.fail_msg
9032     if msg:
9033       log_err = ("Failed fetching source migration information from %s: %s" %
9034                  (source_node, msg))
9035       logging.error(log_err)
9036       raise errors.OpExecError(log_err)
9037
9038     self.migration_info = migration_info = result.payload
9039
9040     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9041       # Then switch the disks to master/master mode
9042       self._EnsureSecondary(target_node)
9043       self._GoStandalone()
9044       self._GoReconnect(True)
9045       self._WaitUntilSync()
9046
9047     self.feedback_fn("* preparing %s to accept the instance" % target_node)
9048     result = self.rpc.call_accept_instance(target_node,
9049                                            instance,
9050                                            migration_info,
9051                                            self.nodes_ip[target_node])
9052
9053     msg = result.fail_msg
9054     if msg:
9055       logging.error("Instance pre-migration failed, trying to revert"
9056                     " disk status: %s", msg)
9057       self.feedback_fn("Pre-migration failed, aborting")
9058       self._AbortMigration()
9059       self._RevertDiskStatus()
9060       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9061                                (instance.name, msg))
9062
9063     self.feedback_fn("* migrating instance to %s" % target_node)
9064     result = self.rpc.call_instance_migrate(source_node, instance,
9065                                             self.nodes_ip[target_node],
9066                                             self.live)
9067     msg = result.fail_msg
9068     if msg:
9069       logging.error("Instance migration failed, trying to revert"
9070                     " disk status: %s", msg)
9071       self.feedback_fn("Migration failed, aborting")
9072       self._AbortMigration()
9073       self._RevertDiskStatus()
9074       raise errors.OpExecError("Could not migrate instance %s: %s" %
9075                                (instance.name, msg))
9076
9077     self.feedback_fn("* starting memory transfer")
9078     last_feedback = time.time()
9079     while True:
9080       result = self.rpc.call_instance_get_migration_status(source_node,
9081                                                            instance)
9082       msg = result.fail_msg
9083       ms = result.payload   # MigrationStatus instance
9084       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9085         logging.error("Instance migration failed, trying to revert"
9086                       " disk status: %s", msg)
9087         self.feedback_fn("Migration failed, aborting")
9088         self._AbortMigration()
9089         self._RevertDiskStatus()
9090         if not msg:
9091           msg = "hypervisor returned failure"
9092         raise errors.OpExecError("Could not migrate instance %s: %s" %
9093                                  (instance.name, msg))
9094
9095       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9096         self.feedback_fn("* memory transfer complete")
9097         break
9098
9099       if (utils.TimeoutExpired(last_feedback,
9100                                self._MIGRATION_FEEDBACK_INTERVAL) and
9101           ms.transferred_ram is not None):
9102         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9103         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9104         last_feedback = time.time()
9105
9106       time.sleep(self._MIGRATION_POLL_INTERVAL)
9107
9108     result = self.rpc.call_instance_finalize_migration_src(source_node,
9109                                                            instance,
9110                                                            True,
9111                                                            self.live)
9112     msg = result.fail_msg
9113     if msg:
9114       logging.error("Instance migration succeeded, but finalization failed"
9115                     " on the source node: %s", msg)
9116       raise errors.OpExecError("Could not finalize instance migration: %s" %
9117                                msg)
9118
9119     instance.primary_node = target_node
9120
9121     # distribute new instance config to the other nodes
9122     self.cfg.Update(instance, self.feedback_fn)
9123
9124     result = self.rpc.call_instance_finalize_migration_dst(target_node,
9125                                                            instance,
9126                                                            migration_info,
9127                                                            True)
9128     msg = result.fail_msg
9129     if msg:
9130       logging.error("Instance migration succeeded, but finalization failed"
9131                     " on the target node: %s", msg)
9132       raise errors.OpExecError("Could not finalize instance migration: %s" %
9133                                msg)
9134
9135     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9136       self._EnsureSecondary(source_node)
9137       self._WaitUntilSync()
9138       self._GoStandalone()
9139       self._GoReconnect(False)
9140       self._WaitUntilSync()
9141
9142     # If the instance's disk template is `rbd' or `ext' and there was a
9143     # successful migration, unmap the device from the source node.
9144     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9145       disks = _ExpandCheckDisks(instance, instance.disks)
9146       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9147       for disk in disks:
9148         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9149         msg = result.fail_msg
9150         if msg:
9151           logging.error("Migration was successful, but couldn't unmap the"
9152                         " block device %s on source node %s: %s",
9153                         disk.iv_name, source_node, msg)
9154           logging.error("You need to unmap the device %s manually on %s",
9155                         disk.iv_name, source_node)
9156
9157     self.feedback_fn("* done")
9158
9159   def _ExecFailover(self):
9160     """Failover an instance.
9161
9162     The failover is done by shutting it down on its present node and
9163     starting it on the secondary.
9164
9165     """
9166     instance = self.instance
9167     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9168
9169     source_node = instance.primary_node
9170     target_node = self.target_node
9171
9172     if instance.admin_state == constants.ADMINST_UP:
9173       self.feedback_fn("* checking disk consistency between source and target")
9174       for (idx, dev) in enumerate(instance.disks):
9175         # for drbd, these are drbd over lvm
9176         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9177                                      False):
9178           if primary_node.offline:
9179             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9180                              " target node %s" %
9181                              (primary_node.name, idx, target_node))
9182           elif not self.ignore_consistency:
9183             raise errors.OpExecError("Disk %s is degraded on target node,"
9184                                      " aborting failover" % idx)
9185     else:
9186       self.feedback_fn("* not checking disk consistency as instance is not"
9187                        " running")
9188
9189     self.feedback_fn("* shutting down instance on source node")
9190     logging.info("Shutting down instance %s on node %s",
9191                  instance.name, source_node)
9192
9193     result = self.rpc.call_instance_shutdown(source_node, instance,
9194                                              self.shutdown_timeout)
9195     msg = result.fail_msg
9196     if msg:
9197       if self.ignore_consistency or primary_node.offline:
9198         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9199                            " proceeding anyway; please make sure node"
9200                            " %s is down; error details: %s",
9201                            instance.name, source_node, source_node, msg)
9202       else:
9203         raise errors.OpExecError("Could not shutdown instance %s on"
9204                                  " node %s: %s" %
9205                                  (instance.name, source_node, msg))
9206
9207     self.feedback_fn("* deactivating the instance's disks on source node")
9208     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9209       raise errors.OpExecError("Can't shut down the instance's disks")
9210
9211     instance.primary_node = target_node
9212     # distribute new instance config to the other nodes
9213     self.cfg.Update(instance, self.feedback_fn)
9214
9215     # Only start the instance if it's marked as up
9216     if instance.admin_state == constants.ADMINST_UP:
9217       self.feedback_fn("* activating the instance's disks on target node %s" %
9218                        target_node)
9219       logging.info("Starting instance %s on node %s",
9220                    instance.name, target_node)
9221
9222       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9223                                            ignore_secondaries=True)
9224       if not disks_ok:
9225         _ShutdownInstanceDisks(self.lu, instance)
9226         raise errors.OpExecError("Can't activate the instance's disks")
9227
9228       self.feedback_fn("* starting the instance on the target node %s" %
9229                        target_node)
9230       result = self.rpc.call_instance_start(target_node, (instance, None, None),
9231                                             False)
9232       msg = result.fail_msg
9233       if msg:
9234         _ShutdownInstanceDisks(self.lu, instance)
9235         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9236                                  (instance.name, target_node, msg))
9237
9238   def Exec(self, feedback_fn):
9239     """Perform the migration.
9240
9241     """
9242     self.feedback_fn = feedback_fn
9243     self.source_node = self.instance.primary_node
9244
9245     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9246     if self.instance.disk_template in constants.DTS_INT_MIRROR:
9247       self.target_node = self.instance.secondary_nodes[0]
9248       # Otherwise self.target_node has been populated either
9249       # directly, or through an iallocator.
9250
9251     self.all_nodes = [self.source_node, self.target_node]
9252     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9253                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
9254
9255     if self.failover:
9256       feedback_fn("Failover instance %s" % self.instance.name)
9257       self._ExecFailover()
9258     else:
9259       feedback_fn("Migrating instance %s" % self.instance.name)
9260
9261       if self.cleanup:
9262         return self._ExecCleanup()
9263       else:
9264         return self._ExecMigration()
9265
9266
9267 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9268                     force_open):
9269   """Wrapper around L{_CreateBlockDevInner}.
9270
9271   This method annotates the root device first.
9272
9273   """
9274   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9275   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9276   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9277                               force_open, excl_stor)
9278
9279
9280 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9281                          info, force_open, excl_stor):
9282   """Create a tree of block devices on a given node.
9283
9284   If this device type has to be created on secondaries, create it and
9285   all its children.
9286
9287   If not, just recurse to children keeping the same 'force' value.
9288
9289   @attention: The device has to be annotated already.
9290
9291   @param lu: the lu on whose behalf we execute
9292   @param node: the node on which to create the device
9293   @type instance: L{objects.Instance}
9294   @param instance: the instance which owns the device
9295   @type device: L{objects.Disk}
9296   @param device: the device to create
9297   @type force_create: boolean
9298   @param force_create: whether to force creation of this device; this
9299       will be change to True whenever we find a device which has
9300       CreateOnSecondary() attribute
9301   @param info: the extra 'metadata' we should attach to the device
9302       (this will be represented as a LVM tag)
9303   @type force_open: boolean
9304   @param force_open: this parameter will be passes to the
9305       L{backend.BlockdevCreate} function where it specifies
9306       whether we run on primary or not, and it affects both
9307       the child assembly and the device own Open() execution
9308   @type excl_stor: boolean
9309   @param excl_stor: Whether exclusive_storage is active for the node
9310
9311   """
9312   if device.CreateOnSecondary():
9313     force_create = True
9314
9315   if device.children:
9316     for child in device.children:
9317       _CreateBlockDevInner(lu, node, instance, child, force_create,
9318                            info, force_open, excl_stor)
9319
9320   if not force_create:
9321     return
9322
9323   _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9324                         excl_stor)
9325
9326
9327 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9328                           excl_stor):
9329   """Create a single block device on a given node.
9330
9331   This will not recurse over children of the device, so they must be
9332   created in advance.
9333
9334   @param lu: the lu on whose behalf we execute
9335   @param node: the node on which to create the device
9336   @type instance: L{objects.Instance}
9337   @param instance: the instance which owns the device
9338   @type device: L{objects.Disk}
9339   @param device: the device to create
9340   @param info: the extra 'metadata' we should attach to the device
9341       (this will be represented as a LVM tag)
9342   @type force_open: boolean
9343   @param force_open: this parameter will be passes to the
9344       L{backend.BlockdevCreate} function where it specifies
9345       whether we run on primary or not, and it affects both
9346       the child assembly and the device own Open() execution
9347   @type excl_stor: boolean
9348   @param excl_stor: Whether exclusive_storage is active for the node
9349
9350   """
9351   lu.cfg.SetDiskID(device, node)
9352   result = lu.rpc.call_blockdev_create(node, device, device.size,
9353                                        instance.name, force_open, info,
9354                                        excl_stor)
9355   result.Raise("Can't create block device %s on"
9356                " node %s for instance %s" % (device, node, instance.name))
9357   if device.physical_id is None:
9358     device.physical_id = result.payload
9359
9360
9361 def _GenerateUniqueNames(lu, exts):
9362   """Generate a suitable LV name.
9363
9364   This will generate a logical volume name for the given instance.
9365
9366   """
9367   results = []
9368   for val in exts:
9369     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9370     results.append("%s%s" % (new_id, val))
9371   return results
9372
9373
9374 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9375                          iv_name, p_minor, s_minor):
9376   """Generate a drbd8 device complete with its children.
9377
9378   """
9379   assert len(vgnames) == len(names) == 2
9380   port = lu.cfg.AllocatePort()
9381   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9382
9383   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9384                           logical_id=(vgnames[0], names[0]),
9385                           params={})
9386   dev_meta = objects.Disk(dev_type=constants.LD_LV,
9387                           size=constants.DRBD_META_SIZE,
9388                           logical_id=(vgnames[1], names[1]),
9389                           params={})
9390   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9391                           logical_id=(primary, secondary, port,
9392                                       p_minor, s_minor,
9393                                       shared_secret),
9394                           children=[dev_data, dev_meta],
9395                           iv_name=iv_name, params={})
9396   return drbd_dev
9397
9398
9399 _DISK_TEMPLATE_NAME_PREFIX = {
9400   constants.DT_PLAIN: "",
9401   constants.DT_RBD: ".rbd",
9402   constants.DT_EXT: ".ext",
9403   }
9404
9405
9406 _DISK_TEMPLATE_DEVICE_TYPE = {
9407   constants.DT_PLAIN: constants.LD_LV,
9408   constants.DT_FILE: constants.LD_FILE,
9409   constants.DT_SHARED_FILE: constants.LD_FILE,
9410   constants.DT_BLOCK: constants.LD_BLOCKDEV,
9411   constants.DT_RBD: constants.LD_RBD,
9412   constants.DT_EXT: constants.LD_EXT,
9413   }
9414
9415
9416 def _GenerateDiskTemplate(
9417   lu, template_name, instance_name, primary_node, secondary_nodes,
9418   disk_info, file_storage_dir, file_driver, base_index,
9419   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9420   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9421   """Generate the entire disk layout for a given template type.
9422
9423   """
9424   vgname = lu.cfg.GetVGName()
9425   disk_count = len(disk_info)
9426   disks = []
9427
9428   if template_name == constants.DT_DISKLESS:
9429     pass
9430   elif template_name == constants.DT_DRBD8:
9431     if len(secondary_nodes) != 1:
9432       raise errors.ProgrammerError("Wrong template configuration")
9433     remote_node = secondary_nodes[0]
9434     minors = lu.cfg.AllocateDRBDMinor(
9435       [primary_node, remote_node] * len(disk_info), instance_name)
9436
9437     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9438                                                        full_disk_params)
9439     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9440
9441     names = []
9442     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9443                                                for i in range(disk_count)]):
9444       names.append(lv_prefix + "_data")
9445       names.append(lv_prefix + "_meta")
9446     for idx, disk in enumerate(disk_info):
9447       disk_index = idx + base_index
9448       data_vg = disk.get(constants.IDISK_VG, vgname)
9449       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9450       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9451                                       disk[constants.IDISK_SIZE],
9452                                       [data_vg, meta_vg],
9453                                       names[idx * 2:idx * 2 + 2],
9454                                       "disk/%d" % disk_index,
9455                                       minors[idx * 2], minors[idx * 2 + 1])
9456       disk_dev.mode = disk[constants.IDISK_MODE]
9457       disks.append(disk_dev)
9458   else:
9459     if secondary_nodes:
9460       raise errors.ProgrammerError("Wrong template configuration")
9461
9462     if template_name == constants.DT_FILE:
9463       _req_file_storage()
9464     elif template_name == constants.DT_SHARED_FILE:
9465       _req_shr_file_storage()
9466
9467     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9468     if name_prefix is None:
9469       names = None
9470     else:
9471       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9472                                         (name_prefix, base_index + i)
9473                                         for i in range(disk_count)])
9474
9475     if template_name == constants.DT_PLAIN:
9476
9477       def logical_id_fn(idx, _, disk):
9478         vg = disk.get(constants.IDISK_VG, vgname)
9479         return (vg, names[idx])
9480
9481     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9482       logical_id_fn = \
9483         lambda _, disk_index, disk: (file_driver,
9484                                      "%s/disk%d" % (file_storage_dir,
9485                                                     disk_index))
9486     elif template_name == constants.DT_BLOCK:
9487       logical_id_fn = \
9488         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9489                                        disk[constants.IDISK_ADOPT])
9490     elif template_name == constants.DT_RBD:
9491       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9492     elif template_name == constants.DT_EXT:
9493       def logical_id_fn(idx, _, disk):
9494         provider = disk.get(constants.IDISK_PROVIDER, None)
9495         if provider is None:
9496           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9497                                        " not found", constants.DT_EXT,
9498                                        constants.IDISK_PROVIDER)
9499         return (provider, names[idx])
9500     else:
9501       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9502
9503     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9504
9505     for idx, disk in enumerate(disk_info):
9506       params = {}
9507       # Only for the Ext template add disk_info to params
9508       if template_name == constants.DT_EXT:
9509         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9510         for key in disk:
9511           if key not in constants.IDISK_PARAMS:
9512             params[key] = disk[key]
9513       disk_index = idx + base_index
9514       size = disk[constants.IDISK_SIZE]
9515       feedback_fn("* disk %s, size %s" %
9516                   (disk_index, utils.FormatUnit(size, "h")))
9517       disks.append(objects.Disk(dev_type=dev_type, size=size,
9518                                 logical_id=logical_id_fn(idx, disk_index, disk),
9519                                 iv_name="disk/%d" % disk_index,
9520                                 mode=disk[constants.IDISK_MODE],
9521                                 params=params))
9522
9523   return disks
9524
9525
9526 def _GetInstanceInfoText(instance):
9527   """Compute that text that should be added to the disk's metadata.
9528
9529   """
9530   return "originstname+%s" % instance.name
9531
9532
9533 def _CalcEta(time_taken, written, total_size):
9534   """Calculates the ETA based on size written and total size.
9535
9536   @param time_taken: The time taken so far
9537   @param written: amount written so far
9538   @param total_size: The total size of data to be written
9539   @return: The remaining time in seconds
9540
9541   """
9542   avg_time = time_taken / float(written)
9543   return (total_size - written) * avg_time
9544
9545
9546 def _WipeDisks(lu, instance, disks=None):
9547   """Wipes instance disks.
9548
9549   @type lu: L{LogicalUnit}
9550   @param lu: the logical unit on whose behalf we execute
9551   @type instance: L{objects.Instance}
9552   @param instance: the instance whose disks we should create
9553   @return: the success of the wipe
9554
9555   """
9556   node = instance.primary_node
9557
9558   if disks is None:
9559     disks = [(idx, disk, 0)
9560              for (idx, disk) in enumerate(instance.disks)]
9561
9562   for (_, device, _) in disks:
9563     lu.cfg.SetDiskID(device, node)
9564
9565   logging.info("Pausing synchronization of disks of instance '%s'",
9566                instance.name)
9567   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9568                                                   (map(compat.snd, disks),
9569                                                    instance),
9570                                                   True)
9571   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9572
9573   for idx, success in enumerate(result.payload):
9574     if not success:
9575       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9576                    " failed", idx, instance.name)
9577
9578   try:
9579     for (idx, device, offset) in disks:
9580       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9581       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9582       wipe_chunk_size = \
9583         int(min(constants.MAX_WIPE_CHUNK,
9584                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9585
9586       size = device.size
9587       last_output = 0
9588       start_time = time.time()
9589
9590       if offset == 0:
9591         info_text = ""
9592       else:
9593         info_text = (" (from %s to %s)" %
9594                      (utils.FormatUnit(offset, "h"),
9595                       utils.FormatUnit(size, "h")))
9596
9597       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9598
9599       logging.info("Wiping disk %d for instance %s on node %s using"
9600                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9601
9602       while offset < size:
9603         wipe_size = min(wipe_chunk_size, size - offset)
9604
9605         logging.debug("Wiping disk %d, offset %s, chunk %s",
9606                       idx, offset, wipe_size)
9607
9608         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9609                                            wipe_size)
9610         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9611                      (idx, offset, wipe_size))
9612
9613         now = time.time()
9614         offset += wipe_size
9615         if now - last_output >= 60:
9616           eta = _CalcEta(now - start_time, offset, size)
9617           lu.LogInfo(" - done: %.1f%% ETA: %s",
9618                      offset / float(size) * 100, utils.FormatSeconds(eta))
9619           last_output = now
9620   finally:
9621     logging.info("Resuming synchronization of disks for instance '%s'",
9622                  instance.name)
9623
9624     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9625                                                     (map(compat.snd, disks),
9626                                                      instance),
9627                                                     False)
9628
9629     if result.fail_msg:
9630       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9631                     node, result.fail_msg)
9632     else:
9633       for idx, success in enumerate(result.payload):
9634         if not success:
9635           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9636                         " failed", idx, instance.name)
9637
9638
9639 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9640   """Create all disks for an instance.
9641
9642   This abstracts away some work from AddInstance.
9643
9644   @type lu: L{LogicalUnit}
9645   @param lu: the logical unit on whose behalf we execute
9646   @type instance: L{objects.Instance}
9647   @param instance: the instance whose disks we should create
9648   @type to_skip: list
9649   @param to_skip: list of indices to skip
9650   @type target_node: string
9651   @param target_node: if passed, overrides the target node for creation
9652   @rtype: boolean
9653   @return: the success of the creation
9654
9655   """
9656   info = _GetInstanceInfoText(instance)
9657   if target_node is None:
9658     pnode = instance.primary_node
9659     all_nodes = instance.all_nodes
9660   else:
9661     pnode = target_node
9662     all_nodes = [pnode]
9663
9664   if instance.disk_template in constants.DTS_FILEBASED:
9665     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9666     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9667
9668     result.Raise("Failed to create directory '%s' on"
9669                  " node %s" % (file_storage_dir, pnode))
9670
9671   # Note: this needs to be kept in sync with adding of disks in
9672   # LUInstanceSetParams
9673   for idx, device in enumerate(instance.disks):
9674     if to_skip and idx in to_skip:
9675       continue
9676     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9677     #HARDCODE
9678     for node in all_nodes:
9679       f_create = node == pnode
9680       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9681
9682
9683 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9684   """Remove all disks for an instance.
9685
9686   This abstracts away some work from `AddInstance()` and
9687   `RemoveInstance()`. Note that in case some of the devices couldn't
9688   be removed, the removal will continue with the other ones (compare
9689   with `_CreateDisks()`).
9690
9691   @type lu: L{LogicalUnit}
9692   @param lu: the logical unit on whose behalf we execute
9693   @type instance: L{objects.Instance}
9694   @param instance: the instance whose disks we should remove
9695   @type target_node: string
9696   @param target_node: used to override the node on which to remove the disks
9697   @rtype: boolean
9698   @return: the success of the removal
9699
9700   """
9701   logging.info("Removing block devices for instance %s", instance.name)
9702
9703   all_result = True
9704   ports_to_release = set()
9705   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9706   for (idx, device) in enumerate(anno_disks):
9707     if target_node:
9708       edata = [(target_node, device)]
9709     else:
9710       edata = device.ComputeNodeTree(instance.primary_node)
9711     for node, disk in edata:
9712       lu.cfg.SetDiskID(disk, node)
9713       result = lu.rpc.call_blockdev_remove(node, disk)
9714       if result.fail_msg:
9715         lu.LogWarning("Could not remove disk %s on node %s,"
9716                       " continuing anyway: %s", idx, node, result.fail_msg)
9717         if not (result.offline and node != instance.primary_node):
9718           all_result = False
9719
9720     # if this is a DRBD disk, return its port to the pool
9721     if device.dev_type in constants.LDS_DRBD:
9722       ports_to_release.add(device.logical_id[2])
9723
9724   if all_result or ignore_failures:
9725     for port in ports_to_release:
9726       lu.cfg.AddTcpUdpPort(port)
9727
9728   if instance.disk_template in constants.DTS_FILEBASED:
9729     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9730     if target_node:
9731       tgt = target_node
9732     else:
9733       tgt = instance.primary_node
9734     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9735     if result.fail_msg:
9736       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9737                     file_storage_dir, instance.primary_node, result.fail_msg)
9738       all_result = False
9739
9740   return all_result
9741
9742
9743 def _ComputeDiskSizePerVG(disk_template, disks):
9744   """Compute disk size requirements in the volume group
9745
9746   """
9747   def _compute(disks, payload):
9748     """Universal algorithm.
9749
9750     """
9751     vgs = {}
9752     for disk in disks:
9753       vgs[disk[constants.IDISK_VG]] = \
9754         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9755
9756     return vgs
9757
9758   # Required free disk space as a function of disk and swap space
9759   req_size_dict = {
9760     constants.DT_DISKLESS: {},
9761     constants.DT_PLAIN: _compute(disks, 0),
9762     # 128 MB are added for drbd metadata for each disk
9763     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9764     constants.DT_FILE: {},
9765     constants.DT_SHARED_FILE: {},
9766   }
9767
9768   if disk_template not in req_size_dict:
9769     raise errors.ProgrammerError("Disk template '%s' size requirement"
9770                                  " is unknown" % disk_template)
9771
9772   return req_size_dict[disk_template]
9773
9774
9775 def _FilterVmNodes(lu, nodenames):
9776   """Filters out non-vm_capable nodes from a list.
9777
9778   @type lu: L{LogicalUnit}
9779   @param lu: the logical unit for which we check
9780   @type nodenames: list
9781   @param nodenames: the list of nodes on which we should check
9782   @rtype: list
9783   @return: the list of vm-capable nodes
9784
9785   """
9786   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9787   return [name for name in nodenames if name not in vm_nodes]
9788
9789
9790 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9791   """Hypervisor parameter validation.
9792
9793   This function abstract the hypervisor parameter validation to be
9794   used in both instance create and instance modify.
9795
9796   @type lu: L{LogicalUnit}
9797   @param lu: the logical unit for which we check
9798   @type nodenames: list
9799   @param nodenames: the list of nodes on which we should check
9800   @type hvname: string
9801   @param hvname: the name of the hypervisor we should use
9802   @type hvparams: dict
9803   @param hvparams: the parameters which we need to check
9804   @raise errors.OpPrereqError: if the parameters are not valid
9805
9806   """
9807   nodenames = _FilterVmNodes(lu, nodenames)
9808
9809   cluster = lu.cfg.GetClusterInfo()
9810   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9811
9812   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9813   for node in nodenames:
9814     info = hvinfo[node]
9815     if info.offline:
9816       continue
9817     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9818
9819
9820 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9821   """OS parameters validation.
9822
9823   @type lu: L{LogicalUnit}
9824   @param lu: the logical unit for which we check
9825   @type required: boolean
9826   @param required: whether the validation should fail if the OS is not
9827       found
9828   @type nodenames: list
9829   @param nodenames: the list of nodes on which we should check
9830   @type osname: string
9831   @param osname: the name of the hypervisor we should use
9832   @type osparams: dict
9833   @param osparams: the parameters which we need to check
9834   @raise errors.OpPrereqError: if the parameters are not valid
9835
9836   """
9837   nodenames = _FilterVmNodes(lu, nodenames)
9838   result = lu.rpc.call_os_validate(nodenames, required, osname,
9839                                    [constants.OS_VALIDATE_PARAMETERS],
9840                                    osparams)
9841   for node, nres in result.items():
9842     # we don't check for offline cases since this should be run only
9843     # against the master node and/or an instance's nodes
9844     nres.Raise("OS Parameters validation failed on node %s" % node)
9845     if not nres.payload:
9846       lu.LogInfo("OS %s not found on node %s, validation skipped",
9847                  osname, node)
9848
9849
9850 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9851   """Wrapper around IAReqInstanceAlloc.
9852
9853   @param op: The instance opcode
9854   @param disks: The computed disks
9855   @param nics: The computed nics
9856   @param beparams: The full filled beparams
9857   @param node_whitelist: List of nodes which should appear as online to the
9858     allocator (unless the node is already marked offline)
9859
9860   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9861
9862   """
9863   spindle_use = beparams[constants.BE_SPINDLE_USE]
9864   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9865                                        disk_template=op.disk_template,
9866                                        tags=op.tags,
9867                                        os=op.os_type,
9868                                        vcpus=beparams[constants.BE_VCPUS],
9869                                        memory=beparams[constants.BE_MAXMEM],
9870                                        spindle_use=spindle_use,
9871                                        disks=disks,
9872                                        nics=[n.ToDict() for n in nics],
9873                                        hypervisor=op.hypervisor,
9874                                        node_whitelist=node_whitelist)
9875
9876
9877 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9878   """Computes the nics.
9879
9880   @param op: The instance opcode
9881   @param cluster: Cluster configuration object
9882   @param default_ip: The default ip to assign
9883   @param cfg: An instance of the configuration object
9884   @param ec_id: Execution context ID
9885
9886   @returns: The build up nics
9887
9888   """
9889   nics = []
9890   for nic in op.nics:
9891     nic_mode_req = nic.get(constants.INIC_MODE, None)
9892     nic_mode = nic_mode_req
9893     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9894       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9895
9896     net = nic.get(constants.INIC_NETWORK, None)
9897     link = nic.get(constants.NIC_LINK, None)
9898     ip = nic.get(constants.INIC_IP, None)
9899
9900     if net is None or net.lower() == constants.VALUE_NONE:
9901       net = None
9902     else:
9903       if nic_mode_req is not None or link is not None:
9904         raise errors.OpPrereqError("If network is given, no mode or link"
9905                                    " is allowed to be passed",
9906                                    errors.ECODE_INVAL)
9907
9908     # ip validity checks
9909     if ip is None or ip.lower() == constants.VALUE_NONE:
9910       nic_ip = None
9911     elif ip.lower() == constants.VALUE_AUTO:
9912       if not op.name_check:
9913         raise errors.OpPrereqError("IP address set to auto but name checks"
9914                                    " have been skipped",
9915                                    errors.ECODE_INVAL)
9916       nic_ip = default_ip
9917     else:
9918       # We defer pool operations until later, so that the iallocator has
9919       # filled in the instance's node(s) dimara
9920       if ip.lower() == constants.NIC_IP_POOL:
9921         if net is None:
9922           raise errors.OpPrereqError("if ip=pool, parameter network"
9923                                      " must be passed too",
9924                                      errors.ECODE_INVAL)
9925
9926       elif not netutils.IPAddress.IsValid(ip):
9927         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9928                                    errors.ECODE_INVAL)
9929
9930       nic_ip = ip
9931
9932     # TODO: check the ip address for uniqueness
9933     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9934       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9935                                  errors.ECODE_INVAL)
9936
9937     # MAC address verification
9938     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9939     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9940       mac = utils.NormalizeAndValidateMac(mac)
9941
9942       try:
9943         # TODO: We need to factor this out
9944         cfg.ReserveMAC(mac, ec_id)
9945       except errors.ReservationError:
9946         raise errors.OpPrereqError("MAC address %s already in use"
9947                                    " in cluster" % mac,
9948                                    errors.ECODE_NOTUNIQUE)
9949
9950     #  Build nic parameters
9951     nicparams = {}
9952     if nic_mode_req:
9953       nicparams[constants.NIC_MODE] = nic_mode
9954     if link:
9955       nicparams[constants.NIC_LINK] = link
9956
9957     check_params = cluster.SimpleFillNIC(nicparams)
9958     objects.NIC.CheckParameterSyntax(check_params)
9959     net_uuid = cfg.LookupNetwork(net)
9960     nics.append(objects.NIC(mac=mac, ip=nic_ip,
9961                             network=net_uuid, nicparams=nicparams))
9962
9963   return nics
9964
9965
9966 def _ComputeDisks(op, default_vg):
9967   """Computes the instance disks.
9968
9969   @param op: The instance opcode
9970   @param default_vg: The default_vg to assume
9971
9972   @return: The computed disks
9973
9974   """
9975   disks = []
9976   for disk in op.disks:
9977     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9978     if mode not in constants.DISK_ACCESS_SET:
9979       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9980                                  mode, errors.ECODE_INVAL)
9981     size = disk.get(constants.IDISK_SIZE, None)
9982     if size is None:
9983       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9984     try:
9985       size = int(size)
9986     except (TypeError, ValueError):
9987       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9988                                  errors.ECODE_INVAL)
9989
9990     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9991     if ext_provider and op.disk_template != constants.DT_EXT:
9992       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9993                                  " disk template, not %s" %
9994                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
9995                                  op.disk_template), errors.ECODE_INVAL)
9996
9997     data_vg = disk.get(constants.IDISK_VG, default_vg)
9998     new_disk = {
9999       constants.IDISK_SIZE: size,
10000       constants.IDISK_MODE: mode,
10001       constants.IDISK_VG: data_vg,
10002       }
10003
10004     if constants.IDISK_METAVG in disk:
10005       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10006     if constants.IDISK_ADOPT in disk:
10007       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10008
10009     # For extstorage, demand the `provider' option and add any
10010     # additional parameters (ext-params) to the dict
10011     if op.disk_template == constants.DT_EXT:
10012       if ext_provider:
10013         new_disk[constants.IDISK_PROVIDER] = ext_provider
10014         for key in disk:
10015           if key not in constants.IDISK_PARAMS:
10016             new_disk[key] = disk[key]
10017       else:
10018         raise errors.OpPrereqError("Missing provider for template '%s'" %
10019                                    constants.DT_EXT, errors.ECODE_INVAL)
10020
10021     disks.append(new_disk)
10022
10023   return disks
10024
10025
10026 def _ComputeFullBeParams(op, cluster):
10027   """Computes the full beparams.
10028
10029   @param op: The instance opcode
10030   @param cluster: The cluster config object
10031
10032   @return: The fully filled beparams
10033
10034   """
10035   default_beparams = cluster.beparams[constants.PP_DEFAULT]
10036   for param, value in op.beparams.iteritems():
10037     if value == constants.VALUE_AUTO:
10038       op.beparams[param] = default_beparams[param]
10039   objects.UpgradeBeParams(op.beparams)
10040   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10041   return cluster.SimpleFillBE(op.beparams)
10042
10043
10044 def _CheckOpportunisticLocking(op):
10045   """Generate error if opportunistic locking is not possible.
10046
10047   """
10048   if op.opportunistic_locking and not op.iallocator:
10049     raise errors.OpPrereqError("Opportunistic locking is only available in"
10050                                " combination with an instance allocator",
10051                                errors.ECODE_INVAL)
10052
10053
10054 class LUInstanceCreate(LogicalUnit):
10055   """Create an instance.
10056
10057   """
10058   HPATH = "instance-add"
10059   HTYPE = constants.HTYPE_INSTANCE
10060   REQ_BGL = False
10061
10062   def CheckArguments(self):
10063     """Check arguments.
10064
10065     """
10066     # do not require name_check to ease forward/backward compatibility
10067     # for tools
10068     if self.op.no_install and self.op.start:
10069       self.LogInfo("No-installation mode selected, disabling startup")
10070       self.op.start = False
10071     # validate/normalize the instance name
10072     self.op.instance_name = \
10073       netutils.Hostname.GetNormalizedName(self.op.instance_name)
10074
10075     if self.op.ip_check and not self.op.name_check:
10076       # TODO: make the ip check more flexible and not depend on the name check
10077       raise errors.OpPrereqError("Cannot do IP address check without a name"
10078                                  " check", errors.ECODE_INVAL)
10079
10080     # check nics' parameter names
10081     for nic in self.op.nics:
10082       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10083
10084     # check disks. parameter names and consistent adopt/no-adopt strategy
10085     has_adopt = has_no_adopt = False
10086     for disk in self.op.disks:
10087       if self.op.disk_template != constants.DT_EXT:
10088         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10089       if constants.IDISK_ADOPT in disk:
10090         has_adopt = True
10091       else:
10092         has_no_adopt = True
10093     if has_adopt and has_no_adopt:
10094       raise errors.OpPrereqError("Either all disks are adopted or none is",
10095                                  errors.ECODE_INVAL)
10096     if has_adopt:
10097       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10098         raise errors.OpPrereqError("Disk adoption is not supported for the"
10099                                    " '%s' disk template" %
10100                                    self.op.disk_template,
10101                                    errors.ECODE_INVAL)
10102       if self.op.iallocator is not None:
10103         raise errors.OpPrereqError("Disk adoption not allowed with an"
10104                                    " iallocator script", errors.ECODE_INVAL)
10105       if self.op.mode == constants.INSTANCE_IMPORT:
10106         raise errors.OpPrereqError("Disk adoption not allowed for"
10107                                    " instance import", errors.ECODE_INVAL)
10108     else:
10109       if self.op.disk_template in constants.DTS_MUST_ADOPT:
10110         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10111                                    " but no 'adopt' parameter given" %
10112                                    self.op.disk_template,
10113                                    errors.ECODE_INVAL)
10114
10115     self.adopt_disks = has_adopt
10116
10117     # instance name verification
10118     if self.op.name_check:
10119       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10120       self.op.instance_name = self.hostname1.name
10121       # used in CheckPrereq for ip ping check
10122       self.check_ip = self.hostname1.ip
10123     else:
10124       self.check_ip = None
10125
10126     # file storage checks
10127     if (self.op.file_driver and
10128         not self.op.file_driver in constants.FILE_DRIVER):
10129       raise errors.OpPrereqError("Invalid file driver name '%s'" %
10130                                  self.op.file_driver, errors.ECODE_INVAL)
10131
10132     if self.op.disk_template == constants.DT_FILE:
10133       opcodes.RequireFileStorage()
10134     elif self.op.disk_template == constants.DT_SHARED_FILE:
10135       opcodes.RequireSharedFileStorage()
10136
10137     ### Node/iallocator related checks
10138     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10139
10140     if self.op.pnode is not None:
10141       if self.op.disk_template in constants.DTS_INT_MIRROR:
10142         if self.op.snode is None:
10143           raise errors.OpPrereqError("The networked disk templates need"
10144                                      " a mirror node", errors.ECODE_INVAL)
10145       elif self.op.snode:
10146         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10147                         " template")
10148         self.op.snode = None
10149
10150     _CheckOpportunisticLocking(self.op)
10151
10152     self._cds = _GetClusterDomainSecret()
10153
10154     if self.op.mode == constants.INSTANCE_IMPORT:
10155       # On import force_variant must be True, because if we forced it at
10156       # initial install, our only chance when importing it back is that it
10157       # works again!
10158       self.op.force_variant = True
10159
10160       if self.op.no_install:
10161         self.LogInfo("No-installation mode has no effect during import")
10162
10163     elif self.op.mode == constants.INSTANCE_CREATE:
10164       if self.op.os_type is None:
10165         raise errors.OpPrereqError("No guest OS specified",
10166                                    errors.ECODE_INVAL)
10167       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10168         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10169                                    " installation" % self.op.os_type,
10170                                    errors.ECODE_STATE)
10171       if self.op.disk_template is None:
10172         raise errors.OpPrereqError("No disk template specified",
10173                                    errors.ECODE_INVAL)
10174
10175     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10176       # Check handshake to ensure both clusters have the same domain secret
10177       src_handshake = self.op.source_handshake
10178       if not src_handshake:
10179         raise errors.OpPrereqError("Missing source handshake",
10180                                    errors.ECODE_INVAL)
10181
10182       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10183                                                            src_handshake)
10184       if errmsg:
10185         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10186                                    errors.ECODE_INVAL)
10187
10188       # Load and check source CA
10189       self.source_x509_ca_pem = self.op.source_x509_ca
10190       if not self.source_x509_ca_pem:
10191         raise errors.OpPrereqError("Missing source X509 CA",
10192                                    errors.ECODE_INVAL)
10193
10194       try:
10195         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10196                                                     self._cds)
10197       except OpenSSL.crypto.Error, err:
10198         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10199                                    (err, ), errors.ECODE_INVAL)
10200
10201       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10202       if errcode is not None:
10203         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10204                                    errors.ECODE_INVAL)
10205
10206       self.source_x509_ca = cert
10207
10208       src_instance_name = self.op.source_instance_name
10209       if not src_instance_name:
10210         raise errors.OpPrereqError("Missing source instance name",
10211                                    errors.ECODE_INVAL)
10212
10213       self.source_instance_name = \
10214           netutils.GetHostname(name=src_instance_name).name
10215
10216     else:
10217       raise errors.OpPrereqError("Invalid instance creation mode %r" %
10218                                  self.op.mode, errors.ECODE_INVAL)
10219
10220   def ExpandNames(self):
10221     """ExpandNames for CreateInstance.
10222
10223     Figure out the right locks for instance creation.
10224
10225     """
10226     self.needed_locks = {}
10227
10228     instance_name = self.op.instance_name
10229     # this is just a preventive check, but someone might still add this
10230     # instance in the meantime, and creation will fail at lock-add time
10231     if instance_name in self.cfg.GetInstanceList():
10232       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10233                                  instance_name, errors.ECODE_EXISTS)
10234
10235     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10236
10237     if self.op.iallocator:
10238       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10239       # specifying a group on instance creation and then selecting nodes from
10240       # that group
10241       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10242       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10243
10244       if self.op.opportunistic_locking:
10245         self.opportunistic_locks[locking.LEVEL_NODE] = True
10246         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10247     else:
10248       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10249       nodelist = [self.op.pnode]
10250       if self.op.snode is not None:
10251         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10252         nodelist.append(self.op.snode)
10253       self.needed_locks[locking.LEVEL_NODE] = nodelist
10254
10255     # in case of import lock the source node too
10256     if self.op.mode == constants.INSTANCE_IMPORT:
10257       src_node = self.op.src_node
10258       src_path = self.op.src_path
10259
10260       if src_path is None:
10261         self.op.src_path = src_path = self.op.instance_name
10262
10263       if src_node is None:
10264         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10265         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10266         self.op.src_node = None
10267         if os.path.isabs(src_path):
10268           raise errors.OpPrereqError("Importing an instance from a path"
10269                                      " requires a source node option",
10270                                      errors.ECODE_INVAL)
10271       else:
10272         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10273         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10274           self.needed_locks[locking.LEVEL_NODE].append(src_node)
10275         if not os.path.isabs(src_path):
10276           self.op.src_path = src_path = \
10277             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10278
10279     self.needed_locks[locking.LEVEL_NODE_RES] = \
10280       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10281
10282   def _RunAllocator(self):
10283     """Run the allocator based on input opcode.
10284
10285     """
10286     if self.op.opportunistic_locking:
10287       # Only consider nodes for which a lock is held
10288       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10289     else:
10290       node_whitelist = None
10291
10292     #TODO Export network to iallocator so that it chooses a pnode
10293     #     in a nodegroup that has the desired network connected to
10294     req = _CreateInstanceAllocRequest(self.op, self.disks,
10295                                       self.nics, self.be_full,
10296                                       node_whitelist)
10297     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10298
10299     ial.Run(self.op.iallocator)
10300
10301     if not ial.success:
10302       # When opportunistic locks are used only a temporary failure is generated
10303       if self.op.opportunistic_locking:
10304         ecode = errors.ECODE_TEMP_NORES
10305       else:
10306         ecode = errors.ECODE_NORES
10307
10308       raise errors.OpPrereqError("Can't compute nodes using"
10309                                  " iallocator '%s': %s" %
10310                                  (self.op.iallocator, ial.info),
10311                                  ecode)
10312
10313     self.op.pnode = ial.result[0]
10314     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10315                  self.op.instance_name, self.op.iallocator,
10316                  utils.CommaJoin(ial.result))
10317
10318     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10319
10320     if req.RequiredNodes() == 2:
10321       self.op.snode = ial.result[1]
10322
10323   def BuildHooksEnv(self):
10324     """Build hooks env.
10325
10326     This runs on master, primary and secondary nodes of the instance.
10327
10328     """
10329     env = {
10330       "ADD_MODE": self.op.mode,
10331       }
10332     if self.op.mode == constants.INSTANCE_IMPORT:
10333       env["SRC_NODE"] = self.op.src_node
10334       env["SRC_PATH"] = self.op.src_path
10335       env["SRC_IMAGES"] = self.src_images
10336
10337     env.update(_BuildInstanceHookEnv(
10338       name=self.op.instance_name,
10339       primary_node=self.op.pnode,
10340       secondary_nodes=self.secondaries,
10341       status=self.op.start,
10342       os_type=self.op.os_type,
10343       minmem=self.be_full[constants.BE_MINMEM],
10344       maxmem=self.be_full[constants.BE_MAXMEM],
10345       vcpus=self.be_full[constants.BE_VCPUS],
10346       nics=_NICListToTuple(self, self.nics),
10347       disk_template=self.op.disk_template,
10348       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10349              for d in self.disks],
10350       bep=self.be_full,
10351       hvp=self.hv_full,
10352       hypervisor_name=self.op.hypervisor,
10353       tags=self.op.tags,
10354     ))
10355
10356     return env
10357
10358   def BuildHooksNodes(self):
10359     """Build hooks nodes.
10360
10361     """
10362     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10363     return nl, nl
10364
10365   def _ReadExportInfo(self):
10366     """Reads the export information from disk.
10367
10368     It will override the opcode source node and path with the actual
10369     information, if these two were not specified before.
10370
10371     @return: the export information
10372
10373     """
10374     assert self.op.mode == constants.INSTANCE_IMPORT
10375
10376     src_node = self.op.src_node
10377     src_path = self.op.src_path
10378
10379     if src_node is None:
10380       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10381       exp_list = self.rpc.call_export_list(locked_nodes)
10382       found = False
10383       for node in exp_list:
10384         if exp_list[node].fail_msg:
10385           continue
10386         if src_path in exp_list[node].payload:
10387           found = True
10388           self.op.src_node = src_node = node
10389           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10390                                                        src_path)
10391           break
10392       if not found:
10393         raise errors.OpPrereqError("No export found for relative path %s" %
10394                                     src_path, errors.ECODE_INVAL)
10395
10396     _CheckNodeOnline(self, src_node)
10397     result = self.rpc.call_export_info(src_node, src_path)
10398     result.Raise("No export or invalid export found in dir %s" % src_path)
10399
10400     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10401     if not export_info.has_section(constants.INISECT_EXP):
10402       raise errors.ProgrammerError("Corrupted export config",
10403                                    errors.ECODE_ENVIRON)
10404
10405     ei_version = export_info.get(constants.INISECT_EXP, "version")
10406     if (int(ei_version) != constants.EXPORT_VERSION):
10407       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10408                                  (ei_version, constants.EXPORT_VERSION),
10409                                  errors.ECODE_ENVIRON)
10410     return export_info
10411
10412   def _ReadExportParams(self, einfo):
10413     """Use export parameters as defaults.
10414
10415     In case the opcode doesn't specify (as in override) some instance
10416     parameters, then try to use them from the export information, if
10417     that declares them.
10418
10419     """
10420     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10421
10422     if self.op.disk_template is None:
10423       if einfo.has_option(constants.INISECT_INS, "disk_template"):
10424         self.op.disk_template = einfo.get(constants.INISECT_INS,
10425                                           "disk_template")
10426         if self.op.disk_template not in constants.DISK_TEMPLATES:
10427           raise errors.OpPrereqError("Disk template specified in configuration"
10428                                      " file is not one of the allowed values:"
10429                                      " %s" %
10430                                      " ".join(constants.DISK_TEMPLATES),
10431                                      errors.ECODE_INVAL)
10432       else:
10433         raise errors.OpPrereqError("No disk template specified and the export"
10434                                    " is missing the disk_template information",
10435                                    errors.ECODE_INVAL)
10436
10437     if not self.op.disks:
10438       disks = []
10439       # TODO: import the disk iv_name too
10440       for idx in range(constants.MAX_DISKS):
10441         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10442           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10443           disks.append({constants.IDISK_SIZE: disk_sz})
10444       self.op.disks = disks
10445       if not disks and self.op.disk_template != constants.DT_DISKLESS:
10446         raise errors.OpPrereqError("No disk info specified and the export"
10447                                    " is missing the disk information",
10448                                    errors.ECODE_INVAL)
10449
10450     if not self.op.nics:
10451       nics = []
10452       for idx in range(constants.MAX_NICS):
10453         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10454           ndict = {}
10455           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10456             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10457             ndict[name] = v
10458           nics.append(ndict)
10459         else:
10460           break
10461       self.op.nics = nics
10462
10463     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10464       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10465
10466     if (self.op.hypervisor is None and
10467         einfo.has_option(constants.INISECT_INS, "hypervisor")):
10468       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10469
10470     if einfo.has_section(constants.INISECT_HYP):
10471       # use the export parameters but do not override the ones
10472       # specified by the user
10473       for name, value in einfo.items(constants.INISECT_HYP):
10474         if name not in self.op.hvparams:
10475           self.op.hvparams[name] = value
10476
10477     if einfo.has_section(constants.INISECT_BEP):
10478       # use the parameters, without overriding
10479       for name, value in einfo.items(constants.INISECT_BEP):
10480         if name not in self.op.beparams:
10481           self.op.beparams[name] = value
10482         # Compatibility for the old "memory" be param
10483         if name == constants.BE_MEMORY:
10484           if constants.BE_MAXMEM not in self.op.beparams:
10485             self.op.beparams[constants.BE_MAXMEM] = value
10486           if constants.BE_MINMEM not in self.op.beparams:
10487             self.op.beparams[constants.BE_MINMEM] = value
10488     else:
10489       # try to read the parameters old style, from the main section
10490       for name in constants.BES_PARAMETERS:
10491         if (name not in self.op.beparams and
10492             einfo.has_option(constants.INISECT_INS, name)):
10493           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10494
10495     if einfo.has_section(constants.INISECT_OSP):
10496       # use the parameters, without overriding
10497       for name, value in einfo.items(constants.INISECT_OSP):
10498         if name not in self.op.osparams:
10499           self.op.osparams[name] = value
10500
10501   def _RevertToDefaults(self, cluster):
10502     """Revert the instance parameters to the default values.
10503
10504     """
10505     # hvparams
10506     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10507     for name in self.op.hvparams.keys():
10508       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10509         del self.op.hvparams[name]
10510     # beparams
10511     be_defs = cluster.SimpleFillBE({})
10512     for name in self.op.beparams.keys():
10513       if name in be_defs and be_defs[name] == self.op.beparams[name]:
10514         del self.op.beparams[name]
10515     # nic params
10516     nic_defs = cluster.SimpleFillNIC({})
10517     for nic in self.op.nics:
10518       for name in constants.NICS_PARAMETERS:
10519         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10520           del nic[name]
10521     # osparams
10522     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10523     for name in self.op.osparams.keys():
10524       if name in os_defs and os_defs[name] == self.op.osparams[name]:
10525         del self.op.osparams[name]
10526
10527   def _CalculateFileStorageDir(self):
10528     """Calculate final instance file storage dir.
10529
10530     """
10531     # file storage dir calculation/check
10532     self.instance_file_storage_dir = None
10533     if self.op.disk_template in constants.DTS_FILEBASED:
10534       # build the full file storage dir path
10535       joinargs = []
10536
10537       if self.op.disk_template == constants.DT_SHARED_FILE:
10538         get_fsd_fn = self.cfg.GetSharedFileStorageDir
10539       else:
10540         get_fsd_fn = self.cfg.GetFileStorageDir
10541
10542       cfg_storagedir = get_fsd_fn()
10543       if not cfg_storagedir:
10544         raise errors.OpPrereqError("Cluster file storage dir not defined",
10545                                    errors.ECODE_STATE)
10546       joinargs.append(cfg_storagedir)
10547
10548       if self.op.file_storage_dir is not None:
10549         joinargs.append(self.op.file_storage_dir)
10550
10551       joinargs.append(self.op.instance_name)
10552
10553       # pylint: disable=W0142
10554       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10555
10556   def CheckPrereq(self): # pylint: disable=R0914
10557     """Check prerequisites.
10558
10559     """
10560     self._CalculateFileStorageDir()
10561
10562     if self.op.mode == constants.INSTANCE_IMPORT:
10563       export_info = self._ReadExportInfo()
10564       self._ReadExportParams(export_info)
10565       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10566     else:
10567       self._old_instance_name = None
10568
10569     if (not self.cfg.GetVGName() and
10570         self.op.disk_template not in constants.DTS_NOT_LVM):
10571       raise errors.OpPrereqError("Cluster does not support lvm-based"
10572                                  " instances", errors.ECODE_STATE)
10573
10574     if (self.op.hypervisor is None or
10575         self.op.hypervisor == constants.VALUE_AUTO):
10576       self.op.hypervisor = self.cfg.GetHypervisorType()
10577
10578     cluster = self.cfg.GetClusterInfo()
10579     enabled_hvs = cluster.enabled_hypervisors
10580     if self.op.hypervisor not in enabled_hvs:
10581       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10582                                  " cluster (%s)" %
10583                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10584                                  errors.ECODE_STATE)
10585
10586     # Check tag validity
10587     for tag in self.op.tags:
10588       objects.TaggableObject.ValidateTag(tag)
10589
10590     # check hypervisor parameter syntax (locally)
10591     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10592     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10593                                       self.op.hvparams)
10594     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10595     hv_type.CheckParameterSyntax(filled_hvp)
10596     self.hv_full = filled_hvp
10597     # check that we don't specify global parameters on an instance
10598     _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10599                           "instance", "cluster")
10600
10601     # fill and remember the beparams dict
10602     self.be_full = _ComputeFullBeParams(self.op, cluster)
10603
10604     # build os parameters
10605     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10606
10607     # now that hvp/bep are in final format, let's reset to defaults,
10608     # if told to do so
10609     if self.op.identify_defaults:
10610       self._RevertToDefaults(cluster)
10611
10612     # NIC buildup
10613     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10614                              self.proc.GetECId())
10615
10616     # disk checks/pre-build
10617     default_vg = self.cfg.GetVGName()
10618     self.disks = _ComputeDisks(self.op, default_vg)
10619
10620     if self.op.mode == constants.INSTANCE_IMPORT:
10621       disk_images = []
10622       for idx in range(len(self.disks)):
10623         option = "disk%d_dump" % idx
10624         if export_info.has_option(constants.INISECT_INS, option):
10625           # FIXME: are the old os-es, disk sizes, etc. useful?
10626           export_name = export_info.get(constants.INISECT_INS, option)
10627           image = utils.PathJoin(self.op.src_path, export_name)
10628           disk_images.append(image)
10629         else:
10630           disk_images.append(False)
10631
10632       self.src_images = disk_images
10633
10634       if self.op.instance_name == self._old_instance_name:
10635         for idx, nic in enumerate(self.nics):
10636           if nic.mac == constants.VALUE_AUTO:
10637             nic_mac_ini = "nic%d_mac" % idx
10638             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10639
10640     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10641
10642     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10643     if self.op.ip_check:
10644       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10645         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10646                                    (self.check_ip, self.op.instance_name),
10647                                    errors.ECODE_NOTUNIQUE)
10648
10649     #### mac address generation
10650     # By generating here the mac address both the allocator and the hooks get
10651     # the real final mac address rather than the 'auto' or 'generate' value.
10652     # There is a race condition between the generation and the instance object
10653     # creation, which means that we know the mac is valid now, but we're not
10654     # sure it will be when we actually add the instance. If things go bad
10655     # adding the instance will abort because of a duplicate mac, and the
10656     # creation job will fail.
10657     for nic in self.nics:
10658       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10659         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10660
10661     #### allocator run
10662
10663     if self.op.iallocator is not None:
10664       self._RunAllocator()
10665
10666     # Release all unneeded node locks
10667     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10668     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10669     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10670     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10671
10672     assert (self.owned_locks(locking.LEVEL_NODE) ==
10673             self.owned_locks(locking.LEVEL_NODE_RES)), \
10674       "Node locks differ from node resource locks"
10675
10676     #### node related checks
10677
10678     # check primary node
10679     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10680     assert self.pnode is not None, \
10681       "Cannot retrieve locked node %s" % self.op.pnode
10682     if pnode.offline:
10683       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10684                                  pnode.name, errors.ECODE_STATE)
10685     if pnode.drained:
10686       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10687                                  pnode.name, errors.ECODE_STATE)
10688     if not pnode.vm_capable:
10689       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10690                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10691
10692     self.secondaries = []
10693
10694     # Fill in any IPs from IP pools. This must happen here, because we need to
10695     # know the nic's primary node, as specified by the iallocator
10696     for idx, nic in enumerate(self.nics):
10697       net_uuid = nic.network
10698       if net_uuid is not None:
10699         nobj = self.cfg.GetNetwork(net_uuid)
10700         netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10701         if netparams is None:
10702           raise errors.OpPrereqError("No netparams found for network"
10703                                      " %s. Propably not connected to"
10704                                      " node's %s nodegroup" %
10705                                      (nobj.name, self.pnode.name),
10706                                      errors.ECODE_INVAL)
10707         self.LogInfo("NIC/%d inherits netparams %s" %
10708                      (idx, netparams.values()))
10709         nic.nicparams = dict(netparams)
10710         if nic.ip is not None:
10711           if nic.ip.lower() == constants.NIC_IP_POOL:
10712             try:
10713               nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10714             except errors.ReservationError:
10715               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10716                                          " from the address pool" % idx,
10717                                          errors.ECODE_STATE)
10718             self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10719           else:
10720             try:
10721               self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10722             except errors.ReservationError:
10723               raise errors.OpPrereqError("IP address %s already in use"
10724                                          " or does not belong to network %s" %
10725                                          (nic.ip, nobj.name),
10726                                          errors.ECODE_NOTUNIQUE)
10727
10728       # net is None, ip None or given
10729       elif self.op.conflicts_check:
10730         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10731
10732     # mirror node verification
10733     if self.op.disk_template in constants.DTS_INT_MIRROR:
10734       if self.op.snode == pnode.name:
10735         raise errors.OpPrereqError("The secondary node cannot be the"
10736                                    " primary node", errors.ECODE_INVAL)
10737       _CheckNodeOnline(self, self.op.snode)
10738       _CheckNodeNotDrained(self, self.op.snode)
10739       _CheckNodeVmCapable(self, self.op.snode)
10740       self.secondaries.append(self.op.snode)
10741
10742       snode = self.cfg.GetNodeInfo(self.op.snode)
10743       if pnode.group != snode.group:
10744         self.LogWarning("The primary and secondary nodes are in two"
10745                         " different node groups; the disk parameters"
10746                         " from the first disk's node group will be"
10747                         " used")
10748
10749     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10750       nodes = [pnode]
10751       if self.op.disk_template in constants.DTS_INT_MIRROR:
10752         nodes.append(snode)
10753       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10754       if compat.any(map(has_es, nodes)):
10755         raise errors.OpPrereqError("Disk template %s not supported with"
10756                                    " exclusive storage" % self.op.disk_template,
10757                                    errors.ECODE_STATE)
10758
10759     nodenames = [pnode.name] + self.secondaries
10760
10761     # Verify instance specs
10762     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10763     ispec = {
10764       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10765       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10766       constants.ISPEC_DISK_COUNT: len(self.disks),
10767       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10768       constants.ISPEC_NIC_COUNT: len(self.nics),
10769       constants.ISPEC_SPINDLE_USE: spindle_use,
10770       }
10771
10772     group_info = self.cfg.GetNodeGroup(pnode.group)
10773     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10774     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10775     if not self.op.ignore_ipolicy and res:
10776       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10777              (pnode.group, group_info.name, utils.CommaJoin(res)))
10778       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10779
10780     if not self.adopt_disks:
10781       if self.op.disk_template == constants.DT_RBD:
10782         # _CheckRADOSFreeSpace() is just a placeholder.
10783         # Any function that checks prerequisites can be placed here.
10784         # Check if there is enough space on the RADOS cluster.
10785         _CheckRADOSFreeSpace()
10786       elif self.op.disk_template == constants.DT_EXT:
10787         # FIXME: Function that checks prereqs if needed
10788         pass
10789       else:
10790         # Check lv size requirements, if not adopting
10791         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10792         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10793
10794     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10795       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10796                                 disk[constants.IDISK_ADOPT])
10797                      for disk in self.disks])
10798       if len(all_lvs) != len(self.disks):
10799         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10800                                    errors.ECODE_INVAL)
10801       for lv_name in all_lvs:
10802         try:
10803           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10804           # to ReserveLV uses the same syntax
10805           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10806         except errors.ReservationError:
10807           raise errors.OpPrereqError("LV named %s used by another instance" %
10808                                      lv_name, errors.ECODE_NOTUNIQUE)
10809
10810       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10811       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10812
10813       node_lvs = self.rpc.call_lv_list([pnode.name],
10814                                        vg_names.payload.keys())[pnode.name]
10815       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10816       node_lvs = node_lvs.payload
10817
10818       delta = all_lvs.difference(node_lvs.keys())
10819       if delta:
10820         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10821                                    utils.CommaJoin(delta),
10822                                    errors.ECODE_INVAL)
10823       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10824       if online_lvs:
10825         raise errors.OpPrereqError("Online logical volumes found, cannot"
10826                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10827                                    errors.ECODE_STATE)
10828       # update the size of disk based on what is found
10829       for dsk in self.disks:
10830         dsk[constants.IDISK_SIZE] = \
10831           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10832                                         dsk[constants.IDISK_ADOPT])][0]))
10833
10834     elif self.op.disk_template == constants.DT_BLOCK:
10835       # Normalize and de-duplicate device paths
10836       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10837                        for disk in self.disks])
10838       if len(all_disks) != len(self.disks):
10839         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10840                                    errors.ECODE_INVAL)
10841       baddisks = [d for d in all_disks
10842                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10843       if baddisks:
10844         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10845                                    " cannot be adopted" %
10846                                    (utils.CommaJoin(baddisks),
10847                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10848                                    errors.ECODE_INVAL)
10849
10850       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10851                                             list(all_disks))[pnode.name]
10852       node_disks.Raise("Cannot get block device information from node %s" %
10853                        pnode.name)
10854       node_disks = node_disks.payload
10855       delta = all_disks.difference(node_disks.keys())
10856       if delta:
10857         raise errors.OpPrereqError("Missing block device(s): %s" %
10858                                    utils.CommaJoin(delta),
10859                                    errors.ECODE_INVAL)
10860       for dsk in self.disks:
10861         dsk[constants.IDISK_SIZE] = \
10862           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10863
10864     # Verify instance specs
10865     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10866     ispec = {
10867       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10868       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10869       constants.ISPEC_DISK_COUNT: len(self.disks),
10870       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10871                                   for disk in self.disks],
10872       constants.ISPEC_NIC_COUNT: len(self.nics),
10873       constants.ISPEC_SPINDLE_USE: spindle_use,
10874       }
10875
10876     group_info = self.cfg.GetNodeGroup(pnode.group)
10877     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10878     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10879     if not self.op.ignore_ipolicy and res:
10880       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10881                                   " policy: %s") % (pnode.group,
10882                                                     utils.CommaJoin(res)),
10883                                   errors.ECODE_INVAL)
10884
10885     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10886
10887     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10888     # check OS parameters (remotely)
10889     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10890
10891     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10892
10893     #TODO: _CheckExtParams (remotely)
10894     # Check parameters for extstorage
10895
10896     # memory check on primary node
10897     #TODO(dynmem): use MINMEM for checking
10898     if self.op.start:
10899       _CheckNodeFreeMemory(self, self.pnode.name,
10900                            "creating instance %s" % self.op.instance_name,
10901                            self.be_full[constants.BE_MAXMEM],
10902                            self.op.hypervisor)
10903
10904     self.dry_run_result = list(nodenames)
10905
10906   def Exec(self, feedback_fn):
10907     """Create and add the instance to the cluster.
10908
10909     """
10910     instance = self.op.instance_name
10911     pnode_name = self.pnode.name
10912
10913     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10914                 self.owned_locks(locking.LEVEL_NODE)), \
10915       "Node locks differ from node resource locks"
10916     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10917
10918     ht_kind = self.op.hypervisor
10919     if ht_kind in constants.HTS_REQ_PORT:
10920       network_port = self.cfg.AllocatePort()
10921     else:
10922       network_port = None
10923
10924     # This is ugly but we got a chicken-egg problem here
10925     # We can only take the group disk parameters, as the instance
10926     # has no disks yet (we are generating them right here).
10927     node = self.cfg.GetNodeInfo(pnode_name)
10928     nodegroup = self.cfg.GetNodeGroup(node.group)
10929     disks = _GenerateDiskTemplate(self,
10930                                   self.op.disk_template,
10931                                   instance, pnode_name,
10932                                   self.secondaries,
10933                                   self.disks,
10934                                   self.instance_file_storage_dir,
10935                                   self.op.file_driver,
10936                                   0,
10937                                   feedback_fn,
10938                                   self.cfg.GetGroupDiskParams(nodegroup))
10939
10940     iobj = objects.Instance(name=instance, os=self.op.os_type,
10941                             primary_node=pnode_name,
10942                             nics=self.nics, disks=disks,
10943                             disk_template=self.op.disk_template,
10944                             admin_state=constants.ADMINST_DOWN,
10945                             network_port=network_port,
10946                             beparams=self.op.beparams,
10947                             hvparams=self.op.hvparams,
10948                             hypervisor=self.op.hypervisor,
10949                             osparams=self.op.osparams,
10950                             )
10951
10952     if self.op.tags:
10953       for tag in self.op.tags:
10954         iobj.AddTag(tag)
10955
10956     if self.adopt_disks:
10957       if self.op.disk_template == constants.DT_PLAIN:
10958         # rename LVs to the newly-generated names; we need to construct
10959         # 'fake' LV disks with the old data, plus the new unique_id
10960         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10961         rename_to = []
10962         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10963           rename_to.append(t_dsk.logical_id)
10964           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10965           self.cfg.SetDiskID(t_dsk, pnode_name)
10966         result = self.rpc.call_blockdev_rename(pnode_name,
10967                                                zip(tmp_disks, rename_to))
10968         result.Raise("Failed to rename adoped LVs")
10969     else:
10970       feedback_fn("* creating instance disks...")
10971       try:
10972         _CreateDisks(self, iobj)
10973       except errors.OpExecError:
10974         self.LogWarning("Device creation failed, reverting...")
10975         try:
10976           _RemoveDisks(self, iobj)
10977         finally:
10978           self.cfg.ReleaseDRBDMinors(instance)
10979           raise
10980
10981     feedback_fn("adding instance %s to cluster config" % instance)
10982
10983     self.cfg.AddInstance(iobj, self.proc.GetECId())
10984
10985     # Declare that we don't want to remove the instance lock anymore, as we've
10986     # added the instance to the config
10987     del self.remove_locks[locking.LEVEL_INSTANCE]
10988
10989     if self.op.mode == constants.INSTANCE_IMPORT:
10990       # Release unused nodes
10991       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10992     else:
10993       # Release all nodes
10994       _ReleaseLocks(self, locking.LEVEL_NODE)
10995
10996     disk_abort = False
10997     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10998       feedback_fn("* wiping instance disks...")
10999       try:
11000         _WipeDisks(self, iobj)
11001       except errors.OpExecError, err:
11002         logging.exception("Wiping disks failed")
11003         self.LogWarning("Wiping instance disks failed (%s)", err)
11004         disk_abort = True
11005
11006     if disk_abort:
11007       # Something is already wrong with the disks, don't do anything else
11008       pass
11009     elif self.op.wait_for_sync:
11010       disk_abort = not _WaitForSync(self, iobj)
11011     elif iobj.disk_template in constants.DTS_INT_MIRROR:
11012       # make sure the disks are not degraded (still sync-ing is ok)
11013       feedback_fn("* checking mirrors status")
11014       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11015     else:
11016       disk_abort = False
11017
11018     if disk_abort:
11019       _RemoveDisks(self, iobj)
11020       self.cfg.RemoveInstance(iobj.name)
11021       # Make sure the instance lock gets removed
11022       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11023       raise errors.OpExecError("There are some degraded disks for"
11024                                " this instance")
11025
11026     # Release all node resource locks
11027     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11028
11029     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11030       # we need to set the disks ID to the primary node, since the
11031       # preceding code might or might have not done it, depending on
11032       # disk template and other options
11033       for disk in iobj.disks:
11034         self.cfg.SetDiskID(disk, pnode_name)
11035       if self.op.mode == constants.INSTANCE_CREATE:
11036         if not self.op.no_install:
11037           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11038                         not self.op.wait_for_sync)
11039           if pause_sync:
11040             feedback_fn("* pausing disk sync to install instance OS")
11041             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11042                                                               (iobj.disks,
11043                                                                iobj), True)
11044             for idx, success in enumerate(result.payload):
11045               if not success:
11046                 logging.warn("pause-sync of instance %s for disk %d failed",
11047                              instance, idx)
11048
11049           feedback_fn("* running the instance OS create scripts...")
11050           # FIXME: pass debug option from opcode to backend
11051           os_add_result = \
11052             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11053                                           self.op.debug_level)
11054           if pause_sync:
11055             feedback_fn("* resuming disk sync")
11056             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11057                                                               (iobj.disks,
11058                                                                iobj), False)
11059             for idx, success in enumerate(result.payload):
11060               if not success:
11061                 logging.warn("resume-sync of instance %s for disk %d failed",
11062                              instance, idx)
11063
11064           os_add_result.Raise("Could not add os for instance %s"
11065                               " on node %s" % (instance, pnode_name))
11066
11067       else:
11068         if self.op.mode == constants.INSTANCE_IMPORT:
11069           feedback_fn("* running the instance OS import scripts...")
11070
11071           transfers = []
11072
11073           for idx, image in enumerate(self.src_images):
11074             if not image:
11075               continue
11076
11077             # FIXME: pass debug option from opcode to backend
11078             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11079                                                constants.IEIO_FILE, (image, ),
11080                                                constants.IEIO_SCRIPT,
11081                                                (iobj.disks[idx], idx),
11082                                                None)
11083             transfers.append(dt)
11084
11085           import_result = \
11086             masterd.instance.TransferInstanceData(self, feedback_fn,
11087                                                   self.op.src_node, pnode_name,
11088                                                   self.pnode.secondary_ip,
11089                                                   iobj, transfers)
11090           if not compat.all(import_result):
11091             self.LogWarning("Some disks for instance %s on node %s were not"
11092                             " imported successfully" % (instance, pnode_name))
11093
11094           rename_from = self._old_instance_name
11095
11096         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11097           feedback_fn("* preparing remote import...")
11098           # The source cluster will stop the instance before attempting to make
11099           # a connection. In some cases stopping an instance can take a long
11100           # time, hence the shutdown timeout is added to the connection
11101           # timeout.
11102           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11103                              self.op.source_shutdown_timeout)
11104           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11105
11106           assert iobj.primary_node == self.pnode.name
11107           disk_results = \
11108             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11109                                           self.source_x509_ca,
11110                                           self._cds, timeouts)
11111           if not compat.all(disk_results):
11112             # TODO: Should the instance still be started, even if some disks
11113             # failed to import (valid for local imports, too)?
11114             self.LogWarning("Some disks for instance %s on node %s were not"
11115                             " imported successfully" % (instance, pnode_name))
11116
11117           rename_from = self.source_instance_name
11118
11119         else:
11120           # also checked in the prereq part
11121           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11122                                        % self.op.mode)
11123
11124         # Run rename script on newly imported instance
11125         assert iobj.name == instance
11126         feedback_fn("Running rename script for %s" % instance)
11127         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11128                                                    rename_from,
11129                                                    self.op.debug_level)
11130         if result.fail_msg:
11131           self.LogWarning("Failed to run rename script for %s on node"
11132                           " %s: %s" % (instance, pnode_name, result.fail_msg))
11133
11134     assert not self.owned_locks(locking.LEVEL_NODE_RES)
11135
11136     if self.op.start:
11137       iobj.admin_state = constants.ADMINST_UP
11138       self.cfg.Update(iobj, feedback_fn)
11139       logging.info("Starting instance %s on node %s", instance, pnode_name)
11140       feedback_fn("* starting instance...")
11141       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11142                                             False)
11143       result.Raise("Could not start instance")
11144
11145     return list(iobj.all_nodes)
11146
11147
11148 class LUInstanceMultiAlloc(NoHooksLU):
11149   """Allocates multiple instances at the same time.
11150
11151   """
11152   REQ_BGL = False
11153
11154   def CheckArguments(self):
11155     """Check arguments.
11156
11157     """
11158     nodes = []
11159     for inst in self.op.instances:
11160       if inst.iallocator is not None:
11161         raise errors.OpPrereqError("iallocator are not allowed to be set on"
11162                                    " instance objects", errors.ECODE_INVAL)
11163       nodes.append(bool(inst.pnode))
11164       if inst.disk_template in constants.DTS_INT_MIRROR:
11165         nodes.append(bool(inst.snode))
11166
11167     has_nodes = compat.any(nodes)
11168     if compat.all(nodes) ^ has_nodes:
11169       raise errors.OpPrereqError("There are instance objects providing"
11170                                  " pnode/snode while others do not",
11171                                  errors.ECODE_INVAL)
11172
11173     if self.op.iallocator is None:
11174       default_iallocator = self.cfg.GetDefaultIAllocator()
11175       if default_iallocator and has_nodes:
11176         self.op.iallocator = default_iallocator
11177       else:
11178         raise errors.OpPrereqError("No iallocator or nodes on the instances"
11179                                    " given and no cluster-wide default"
11180                                    " iallocator found; please specify either"
11181                                    " an iallocator or nodes on the instances"
11182                                    " or set a cluster-wide default iallocator",
11183                                    errors.ECODE_INVAL)
11184
11185     _CheckOpportunisticLocking(self.op)
11186
11187     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11188     if dups:
11189       raise errors.OpPrereqError("There are duplicate instance names: %s" %
11190                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
11191
11192   def ExpandNames(self):
11193     """Calculate the locks.
11194
11195     """
11196     self.share_locks = _ShareAll()
11197     self.needed_locks = {
11198       # iallocator will select nodes and even if no iallocator is used,
11199       # collisions with LUInstanceCreate should be avoided
11200       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11201       }
11202
11203     if self.op.iallocator:
11204       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11205       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11206
11207       if self.op.opportunistic_locking:
11208         self.opportunistic_locks[locking.LEVEL_NODE] = True
11209         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11210     else:
11211       nodeslist = []
11212       for inst in self.op.instances:
11213         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11214         nodeslist.append(inst.pnode)
11215         if inst.snode is not None:
11216           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11217           nodeslist.append(inst.snode)
11218
11219       self.needed_locks[locking.LEVEL_NODE] = nodeslist
11220       # Lock resources of instance's primary and secondary nodes (copy to
11221       # prevent accidential modification)
11222       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11223
11224   def CheckPrereq(self):
11225     """Check prerequisite.
11226
11227     """
11228     cluster = self.cfg.GetClusterInfo()
11229     default_vg = self.cfg.GetVGName()
11230     ec_id = self.proc.GetECId()
11231
11232     if self.op.opportunistic_locking:
11233       # Only consider nodes for which a lock is held
11234       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11235     else:
11236       node_whitelist = None
11237
11238     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11239                                          _ComputeNics(op, cluster, None,
11240                                                       self.cfg, ec_id),
11241                                          _ComputeFullBeParams(op, cluster),
11242                                          node_whitelist)
11243              for op in self.op.instances]
11244
11245     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11246     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11247
11248     ial.Run(self.op.iallocator)
11249
11250     if not ial.success:
11251       raise errors.OpPrereqError("Can't compute nodes using"
11252                                  " iallocator '%s': %s" %
11253                                  (self.op.iallocator, ial.info),
11254                                  errors.ECODE_NORES)
11255
11256     self.ia_result = ial.result
11257
11258     if self.op.dry_run:
11259       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11260         constants.JOB_IDS_KEY: [],
11261         })
11262
11263   def _ConstructPartialResult(self):
11264     """Contructs the partial result.
11265
11266     """
11267     (allocatable, failed) = self.ia_result
11268     return {
11269       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11270         map(compat.fst, allocatable),
11271       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11272       }
11273
11274   def Exec(self, feedback_fn):
11275     """Executes the opcode.
11276
11277     """
11278     op2inst = dict((op.instance_name, op) for op in self.op.instances)
11279     (allocatable, failed) = self.ia_result
11280
11281     jobs = []
11282     for (name, nodes) in allocatable:
11283       op = op2inst.pop(name)
11284
11285       if len(nodes) > 1:
11286         (op.pnode, op.snode) = nodes
11287       else:
11288         (op.pnode,) = nodes
11289
11290       jobs.append([op])
11291
11292     missing = set(op2inst.keys()) - set(failed)
11293     assert not missing, \
11294       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11295
11296     return ResultWithJobs(jobs, **self._ConstructPartialResult())
11297
11298
11299 def _CheckRADOSFreeSpace():
11300   """Compute disk size requirements inside the RADOS cluster.
11301
11302   """
11303   # For the RADOS cluster we assume there is always enough space.
11304   pass
11305
11306
11307 class LUInstanceConsole(NoHooksLU):
11308   """Connect to an instance's console.
11309
11310   This is somewhat special in that it returns the command line that
11311   you need to run on the master node in order to connect to the
11312   console.
11313
11314   """
11315   REQ_BGL = False
11316
11317   def ExpandNames(self):
11318     self.share_locks = _ShareAll()
11319     self._ExpandAndLockInstance()
11320
11321   def CheckPrereq(self):
11322     """Check prerequisites.
11323
11324     This checks that the instance is in the cluster.
11325
11326     """
11327     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11328     assert self.instance is not None, \
11329       "Cannot retrieve locked instance %s" % self.op.instance_name
11330     _CheckNodeOnline(self, self.instance.primary_node)
11331
11332   def Exec(self, feedback_fn):
11333     """Connect to the console of an instance
11334
11335     """
11336     instance = self.instance
11337     node = instance.primary_node
11338
11339     node_insts = self.rpc.call_instance_list([node],
11340                                              [instance.hypervisor])[node]
11341     node_insts.Raise("Can't get node information from %s" % node)
11342
11343     if instance.name not in node_insts.payload:
11344       if instance.admin_state == constants.ADMINST_UP:
11345         state = constants.INSTST_ERRORDOWN
11346       elif instance.admin_state == constants.ADMINST_DOWN:
11347         state = constants.INSTST_ADMINDOWN
11348       else:
11349         state = constants.INSTST_ADMINOFFLINE
11350       raise errors.OpExecError("Instance %s is not running (state %s)" %
11351                                (instance.name, state))
11352
11353     logging.debug("Connecting to console of %s on %s", instance.name, node)
11354
11355     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11356
11357
11358 def _GetInstanceConsole(cluster, instance):
11359   """Returns console information for an instance.
11360
11361   @type cluster: L{objects.Cluster}
11362   @type instance: L{objects.Instance}
11363   @rtype: dict
11364
11365   """
11366   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11367   # beparams and hvparams are passed separately, to avoid editing the
11368   # instance and then saving the defaults in the instance itself.
11369   hvparams = cluster.FillHV(instance)
11370   beparams = cluster.FillBE(instance)
11371   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11372
11373   assert console.instance == instance.name
11374   assert console.Validate()
11375
11376   return console.ToDict()
11377
11378
11379 class LUInstanceReplaceDisks(LogicalUnit):
11380   """Replace the disks of an instance.
11381
11382   """
11383   HPATH = "mirrors-replace"
11384   HTYPE = constants.HTYPE_INSTANCE
11385   REQ_BGL = False
11386
11387   def CheckArguments(self):
11388     """Check arguments.
11389
11390     """
11391     remote_node = self.op.remote_node
11392     ialloc = self.op.iallocator
11393     if self.op.mode == constants.REPLACE_DISK_CHG:
11394       if remote_node is None and ialloc is None:
11395         raise errors.OpPrereqError("When changing the secondary either an"
11396                                    " iallocator script must be used or the"
11397                                    " new node given", errors.ECODE_INVAL)
11398       else:
11399         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11400
11401     elif remote_node is not None or ialloc is not None:
11402       # Not replacing the secondary
11403       raise errors.OpPrereqError("The iallocator and new node options can"
11404                                  " only be used when changing the"
11405                                  " secondary node", errors.ECODE_INVAL)
11406
11407   def ExpandNames(self):
11408     self._ExpandAndLockInstance()
11409
11410     assert locking.LEVEL_NODE not in self.needed_locks
11411     assert locking.LEVEL_NODE_RES not in self.needed_locks
11412     assert locking.LEVEL_NODEGROUP not in self.needed_locks
11413
11414     assert self.op.iallocator is None or self.op.remote_node is None, \
11415       "Conflicting options"
11416
11417     if self.op.remote_node is not None:
11418       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11419
11420       # Warning: do not remove the locking of the new secondary here
11421       # unless DRBD8.AddChildren is changed to work in parallel;
11422       # currently it doesn't since parallel invocations of
11423       # FindUnusedMinor will conflict
11424       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11425       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11426     else:
11427       self.needed_locks[locking.LEVEL_NODE] = []
11428       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11429
11430       if self.op.iallocator is not None:
11431         # iallocator will select a new node in the same group
11432         self.needed_locks[locking.LEVEL_NODEGROUP] = []
11433         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11434
11435     self.needed_locks[locking.LEVEL_NODE_RES] = []
11436
11437     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11438                                    self.op.iallocator, self.op.remote_node,
11439                                    self.op.disks, self.op.early_release,
11440                                    self.op.ignore_ipolicy)
11441
11442     self.tasklets = [self.replacer]
11443
11444   def DeclareLocks(self, level):
11445     if level == locking.LEVEL_NODEGROUP:
11446       assert self.op.remote_node is None
11447       assert self.op.iallocator is not None
11448       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11449
11450       self.share_locks[locking.LEVEL_NODEGROUP] = 1
11451       # Lock all groups used by instance optimistically; this requires going
11452       # via the node before it's locked, requiring verification later on
11453       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11454         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11455
11456     elif level == locking.LEVEL_NODE:
11457       if self.op.iallocator is not None:
11458         assert self.op.remote_node is None
11459         assert not self.needed_locks[locking.LEVEL_NODE]
11460         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11461
11462         # Lock member nodes of all locked groups
11463         self.needed_locks[locking.LEVEL_NODE] = \
11464             [node_name
11465              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11466              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11467       else:
11468         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11469
11470         self._LockInstancesNodes()
11471
11472     elif level == locking.LEVEL_NODE_RES:
11473       # Reuse node locks
11474       self.needed_locks[locking.LEVEL_NODE_RES] = \
11475         self.needed_locks[locking.LEVEL_NODE]
11476
11477   def BuildHooksEnv(self):
11478     """Build hooks env.
11479
11480     This runs on the master, the primary and all the secondaries.
11481
11482     """
11483     instance = self.replacer.instance
11484     env = {
11485       "MODE": self.op.mode,
11486       "NEW_SECONDARY": self.op.remote_node,
11487       "OLD_SECONDARY": instance.secondary_nodes[0],
11488       }
11489     env.update(_BuildInstanceHookEnvByObject(self, instance))
11490     return env
11491
11492   def BuildHooksNodes(self):
11493     """Build hooks nodes.
11494
11495     """
11496     instance = self.replacer.instance
11497     nl = [
11498       self.cfg.GetMasterNode(),
11499       instance.primary_node,
11500       ]
11501     if self.op.remote_node is not None:
11502       nl.append(self.op.remote_node)
11503     return nl, nl
11504
11505   def CheckPrereq(self):
11506     """Check prerequisites.
11507
11508     """
11509     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11510             self.op.iallocator is None)
11511
11512     # Verify if node group locks are still correct
11513     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11514     if owned_groups:
11515       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11516
11517     return LogicalUnit.CheckPrereq(self)
11518
11519
11520 class TLReplaceDisks(Tasklet):
11521   """Replaces disks for an instance.
11522
11523   Note: Locking is not within the scope of this class.
11524
11525   """
11526   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11527                disks, early_release, ignore_ipolicy):
11528     """Initializes this class.
11529
11530     """
11531     Tasklet.__init__(self, lu)
11532
11533     # Parameters
11534     self.instance_name = instance_name
11535     self.mode = mode
11536     self.iallocator_name = iallocator_name
11537     self.remote_node = remote_node
11538     self.disks = disks
11539     self.early_release = early_release
11540     self.ignore_ipolicy = ignore_ipolicy
11541
11542     # Runtime data
11543     self.instance = None
11544     self.new_node = None
11545     self.target_node = None
11546     self.other_node = None
11547     self.remote_node_info = None
11548     self.node_secondary_ip = None
11549
11550   @staticmethod
11551   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11552     """Compute a new secondary node using an IAllocator.
11553
11554     """
11555     req = iallocator.IAReqRelocate(name=instance_name,
11556                                    relocate_from=list(relocate_from))
11557     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11558
11559     ial.Run(iallocator_name)
11560
11561     if not ial.success:
11562       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11563                                  " %s" % (iallocator_name, ial.info),
11564                                  errors.ECODE_NORES)
11565
11566     remote_node_name = ial.result[0]
11567
11568     lu.LogInfo("Selected new secondary for instance '%s': %s",
11569                instance_name, remote_node_name)
11570
11571     return remote_node_name
11572
11573   def _FindFaultyDisks(self, node_name):
11574     """Wrapper for L{_FindFaultyInstanceDisks}.
11575
11576     """
11577     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11578                                     node_name, True)
11579
11580   def _CheckDisksActivated(self, instance):
11581     """Checks if the instance disks are activated.
11582
11583     @param instance: The instance to check disks
11584     @return: True if they are activated, False otherwise
11585
11586     """
11587     nodes = instance.all_nodes
11588
11589     for idx, dev in enumerate(instance.disks):
11590       for node in nodes:
11591         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11592         self.cfg.SetDiskID(dev, node)
11593
11594         result = _BlockdevFind(self, node, dev, instance)
11595
11596         if result.offline:
11597           continue
11598         elif result.fail_msg or not result.payload:
11599           return False
11600
11601     return True
11602
11603   def CheckPrereq(self):
11604     """Check prerequisites.
11605
11606     This checks that the instance is in the cluster.
11607
11608     """
11609     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11610     assert instance is not None, \
11611       "Cannot retrieve locked instance %s" % self.instance_name
11612
11613     if instance.disk_template != constants.DT_DRBD8:
11614       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11615                                  " instances", errors.ECODE_INVAL)
11616
11617     if len(instance.secondary_nodes) != 1:
11618       raise errors.OpPrereqError("The instance has a strange layout,"
11619                                  " expected one secondary but found %d" %
11620                                  len(instance.secondary_nodes),
11621                                  errors.ECODE_FAULT)
11622
11623     instance = self.instance
11624     secondary_node = instance.secondary_nodes[0]
11625
11626     if self.iallocator_name is None:
11627       remote_node = self.remote_node
11628     else:
11629       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11630                                        instance.name, instance.secondary_nodes)
11631
11632     if remote_node is None:
11633       self.remote_node_info = None
11634     else:
11635       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11636              "Remote node '%s' is not locked" % remote_node
11637
11638       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11639       assert self.remote_node_info is not None, \
11640         "Cannot retrieve locked node %s" % remote_node
11641
11642     if remote_node == self.instance.primary_node:
11643       raise errors.OpPrereqError("The specified node is the primary node of"
11644                                  " the instance", errors.ECODE_INVAL)
11645
11646     if remote_node == secondary_node:
11647       raise errors.OpPrereqError("The specified node is already the"
11648                                  " secondary node of the instance",
11649                                  errors.ECODE_INVAL)
11650
11651     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11652                                     constants.REPLACE_DISK_CHG):
11653       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11654                                  errors.ECODE_INVAL)
11655
11656     if self.mode == constants.REPLACE_DISK_AUTO:
11657       if not self._CheckDisksActivated(instance):
11658         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11659                                    " first" % self.instance_name,
11660                                    errors.ECODE_STATE)
11661       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11662       faulty_secondary = self._FindFaultyDisks(secondary_node)
11663
11664       if faulty_primary and faulty_secondary:
11665         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11666                                    " one node and can not be repaired"
11667                                    " automatically" % self.instance_name,
11668                                    errors.ECODE_STATE)
11669
11670       if faulty_primary:
11671         self.disks = faulty_primary
11672         self.target_node = instance.primary_node
11673         self.other_node = secondary_node
11674         check_nodes = [self.target_node, self.other_node]
11675       elif faulty_secondary:
11676         self.disks = faulty_secondary
11677         self.target_node = secondary_node
11678         self.other_node = instance.primary_node
11679         check_nodes = [self.target_node, self.other_node]
11680       else:
11681         self.disks = []
11682         check_nodes = []
11683
11684     else:
11685       # Non-automatic modes
11686       if self.mode == constants.REPLACE_DISK_PRI:
11687         self.target_node = instance.primary_node
11688         self.other_node = secondary_node
11689         check_nodes = [self.target_node, self.other_node]
11690
11691       elif self.mode == constants.REPLACE_DISK_SEC:
11692         self.target_node = secondary_node
11693         self.other_node = instance.primary_node
11694         check_nodes = [self.target_node, self.other_node]
11695
11696       elif self.mode == constants.REPLACE_DISK_CHG:
11697         self.new_node = remote_node
11698         self.other_node = instance.primary_node
11699         self.target_node = secondary_node
11700         check_nodes = [self.new_node, self.other_node]
11701
11702         _CheckNodeNotDrained(self.lu, remote_node)
11703         _CheckNodeVmCapable(self.lu, remote_node)
11704
11705         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11706         assert old_node_info is not None
11707         if old_node_info.offline and not self.early_release:
11708           # doesn't make sense to delay the release
11709           self.early_release = True
11710           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11711                           " early-release mode", secondary_node)
11712
11713       else:
11714         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11715                                      self.mode)
11716
11717       # If not specified all disks should be replaced
11718       if not self.disks:
11719         self.disks = range(len(self.instance.disks))
11720
11721     # TODO: This is ugly, but right now we can't distinguish between internal
11722     # submitted opcode and external one. We should fix that.
11723     if self.remote_node_info:
11724       # We change the node, lets verify it still meets instance policy
11725       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11726       cluster = self.cfg.GetClusterInfo()
11727       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11728                                                               new_group_info)
11729       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11730                               ignore=self.ignore_ipolicy)
11731
11732     for node in check_nodes:
11733       _CheckNodeOnline(self.lu, node)
11734
11735     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11736                                                           self.other_node,
11737                                                           self.target_node]
11738                               if node_name is not None)
11739
11740     # Release unneeded node and node resource locks
11741     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11742     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11743     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11744
11745     # Release any owned node group
11746     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11747
11748     # Check whether disks are valid
11749     for disk_idx in self.disks:
11750       instance.FindDisk(disk_idx)
11751
11752     # Get secondary node IP addresses
11753     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11754                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11755
11756   def Exec(self, feedback_fn):
11757     """Execute disk replacement.
11758
11759     This dispatches the disk replacement to the appropriate handler.
11760
11761     """
11762     if __debug__:
11763       # Verify owned locks before starting operation
11764       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11765       assert set(owned_nodes) == set(self.node_secondary_ip), \
11766           ("Incorrect node locks, owning %s, expected %s" %
11767            (owned_nodes, self.node_secondary_ip.keys()))
11768       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11769               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11770       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11771
11772       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11773       assert list(owned_instances) == [self.instance_name], \
11774           "Instance '%s' not locked" % self.instance_name
11775
11776       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11777           "Should not own any node group lock at this point"
11778
11779     if not self.disks:
11780       feedback_fn("No disks need replacement for instance '%s'" %
11781                   self.instance.name)
11782       return
11783
11784     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11785                 (utils.CommaJoin(self.disks), self.instance.name))
11786     feedback_fn("Current primary node: %s" % self.instance.primary_node)
11787     feedback_fn("Current seconary node: %s" %
11788                 utils.CommaJoin(self.instance.secondary_nodes))
11789
11790     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11791
11792     # Activate the instance disks if we're replacing them on a down instance
11793     if activate_disks:
11794       _StartInstanceDisks(self.lu, self.instance, True)
11795
11796     try:
11797       # Should we replace the secondary node?
11798       if self.new_node is not None:
11799         fn = self._ExecDrbd8Secondary
11800       else:
11801         fn = self._ExecDrbd8DiskOnly
11802
11803       result = fn(feedback_fn)
11804     finally:
11805       # Deactivate the instance disks if we're replacing them on a
11806       # down instance
11807       if activate_disks:
11808         _SafeShutdownInstanceDisks(self.lu, self.instance)
11809
11810     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11811
11812     if __debug__:
11813       # Verify owned locks
11814       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11815       nodes = frozenset(self.node_secondary_ip)
11816       assert ((self.early_release and not owned_nodes) or
11817               (not self.early_release and not (set(owned_nodes) - nodes))), \
11818         ("Not owning the correct locks, early_release=%s, owned=%r,"
11819          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11820
11821     return result
11822
11823   def _CheckVolumeGroup(self, nodes):
11824     self.lu.LogInfo("Checking volume groups")
11825
11826     vgname = self.cfg.GetVGName()
11827
11828     # Make sure volume group exists on all involved nodes
11829     results = self.rpc.call_vg_list(nodes)
11830     if not results:
11831       raise errors.OpExecError("Can't list volume groups on the nodes")
11832
11833     for node in nodes:
11834       res = results[node]
11835       res.Raise("Error checking node %s" % node)
11836       if vgname not in res.payload:
11837         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11838                                  (vgname, node))
11839
11840   def _CheckDisksExistence(self, nodes):
11841     # Check disk existence
11842     for idx, dev in enumerate(self.instance.disks):
11843       if idx not in self.disks:
11844         continue
11845
11846       for node in nodes:
11847         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11848         self.cfg.SetDiskID(dev, node)
11849
11850         result = _BlockdevFind(self, node, dev, self.instance)
11851
11852         msg = result.fail_msg
11853         if msg or not result.payload:
11854           if not msg:
11855             msg = "disk not found"
11856           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11857                                    (idx, node, msg))
11858
11859   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11860     for idx, dev in enumerate(self.instance.disks):
11861       if idx not in self.disks:
11862         continue
11863
11864       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11865                       (idx, node_name))
11866
11867       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11868                                    on_primary, ldisk=ldisk):
11869         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11870                                  " replace disks for instance %s" %
11871                                  (node_name, self.instance.name))
11872
11873   def _CreateNewStorage(self, node_name):
11874     """Create new storage on the primary or secondary node.
11875
11876     This is only used for same-node replaces, not for changing the
11877     secondary node, hence we don't want to modify the existing disk.
11878
11879     """
11880     iv_names = {}
11881
11882     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11883     for idx, dev in enumerate(disks):
11884       if idx not in self.disks:
11885         continue
11886
11887       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11888
11889       self.cfg.SetDiskID(dev, node_name)
11890
11891       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11892       names = _GenerateUniqueNames(self.lu, lv_names)
11893
11894       (data_disk, meta_disk) = dev.children
11895       vg_data = data_disk.logical_id[0]
11896       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11897                              logical_id=(vg_data, names[0]),
11898                              params=data_disk.params)
11899       vg_meta = meta_disk.logical_id[0]
11900       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11901                              size=constants.DRBD_META_SIZE,
11902                              logical_id=(vg_meta, names[1]),
11903                              params=meta_disk.params)
11904
11905       new_lvs = [lv_data, lv_meta]
11906       old_lvs = [child.Copy() for child in dev.children]
11907       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11908       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11909
11910       # we pass force_create=True to force the LVM creation
11911       for new_lv in new_lvs:
11912         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11913                              _GetInstanceInfoText(self.instance), False,
11914                              excl_stor)
11915
11916     return iv_names
11917
11918   def _CheckDevices(self, node_name, iv_names):
11919     for name, (dev, _, _) in iv_names.iteritems():
11920       self.cfg.SetDiskID(dev, node_name)
11921
11922       result = _BlockdevFind(self, node_name, dev, self.instance)
11923
11924       msg = result.fail_msg
11925       if msg or not result.payload:
11926         if not msg:
11927           msg = "disk not found"
11928         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11929                                  (name, msg))
11930
11931       if result.payload.is_degraded:
11932         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11933
11934   def _RemoveOldStorage(self, node_name, iv_names):
11935     for name, (_, old_lvs, _) in iv_names.iteritems():
11936       self.lu.LogInfo("Remove logical volumes for %s", name)
11937
11938       for lv in old_lvs:
11939         self.cfg.SetDiskID(lv, node_name)
11940
11941         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11942         if msg:
11943           self.lu.LogWarning("Can't remove old LV: %s", msg,
11944                              hint="remove unused LVs manually")
11945
11946   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11947     """Replace a disk on the primary or secondary for DRBD 8.
11948
11949     The algorithm for replace is quite complicated:
11950
11951       1. for each disk to be replaced:
11952
11953         1. create new LVs on the target node with unique names
11954         1. detach old LVs from the drbd device
11955         1. rename old LVs to name_replaced.<time_t>
11956         1. rename new LVs to old LVs
11957         1. attach the new LVs (with the old names now) to the drbd device
11958
11959       1. wait for sync across all devices
11960
11961       1. for each modified disk:
11962
11963         1. remove old LVs (which have the name name_replaces.<time_t>)
11964
11965     Failures are not very well handled.
11966
11967     """
11968     steps_total = 6
11969
11970     # Step: check device activation
11971     self.lu.LogStep(1, steps_total, "Check device existence")
11972     self._CheckDisksExistence([self.other_node, self.target_node])
11973     self._CheckVolumeGroup([self.target_node, self.other_node])
11974
11975     # Step: check other node consistency
11976     self.lu.LogStep(2, steps_total, "Check peer consistency")
11977     self._CheckDisksConsistency(self.other_node,
11978                                 self.other_node == self.instance.primary_node,
11979                                 False)
11980
11981     # Step: create new storage
11982     self.lu.LogStep(3, steps_total, "Allocate new storage")
11983     iv_names = self._CreateNewStorage(self.target_node)
11984
11985     # Step: for each lv, detach+rename*2+attach
11986     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11987     for dev, old_lvs, new_lvs in iv_names.itervalues():
11988       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11989
11990       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11991                                                      old_lvs)
11992       result.Raise("Can't detach drbd from local storage on node"
11993                    " %s for device %s" % (self.target_node, dev.iv_name))
11994       #dev.children = []
11995       #cfg.Update(instance)
11996
11997       # ok, we created the new LVs, so now we know we have the needed
11998       # storage; as such, we proceed on the target node to rename
11999       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12000       # using the assumption that logical_id == physical_id (which in
12001       # turn is the unique_id on that node)
12002
12003       # FIXME(iustin): use a better name for the replaced LVs
12004       temp_suffix = int(time.time())
12005       ren_fn = lambda d, suff: (d.physical_id[0],
12006                                 d.physical_id[1] + "_replaced-%s" % suff)
12007
12008       # Build the rename list based on what LVs exist on the node
12009       rename_old_to_new = []
12010       for to_ren in old_lvs:
12011         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12012         if not result.fail_msg and result.payload:
12013           # device exists
12014           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12015
12016       self.lu.LogInfo("Renaming the old LVs on the target node")
12017       result = self.rpc.call_blockdev_rename(self.target_node,
12018                                              rename_old_to_new)
12019       result.Raise("Can't rename old LVs on node %s" % self.target_node)
12020
12021       # Now we rename the new LVs to the old LVs
12022       self.lu.LogInfo("Renaming the new LVs on the target node")
12023       rename_new_to_old = [(new, old.physical_id)
12024                            for old, new in zip(old_lvs, new_lvs)]
12025       result = self.rpc.call_blockdev_rename(self.target_node,
12026                                              rename_new_to_old)
12027       result.Raise("Can't rename new LVs on node %s" % self.target_node)
12028
12029       # Intermediate steps of in memory modifications
12030       for old, new in zip(old_lvs, new_lvs):
12031         new.logical_id = old.logical_id
12032         self.cfg.SetDiskID(new, self.target_node)
12033
12034       # We need to modify old_lvs so that removal later removes the
12035       # right LVs, not the newly added ones; note that old_lvs is a
12036       # copy here
12037       for disk in old_lvs:
12038         disk.logical_id = ren_fn(disk, temp_suffix)
12039         self.cfg.SetDiskID(disk, self.target_node)
12040
12041       # Now that the new lvs have the old name, we can add them to the device
12042       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12043       result = self.rpc.call_blockdev_addchildren(self.target_node,
12044                                                   (dev, self.instance), new_lvs)
12045       msg = result.fail_msg
12046       if msg:
12047         for new_lv in new_lvs:
12048           msg2 = self.rpc.call_blockdev_remove(self.target_node,
12049                                                new_lv).fail_msg
12050           if msg2:
12051             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12052                                hint=("cleanup manually the unused logical"
12053                                      "volumes"))
12054         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12055
12056     cstep = itertools.count(5)
12057
12058     if self.early_release:
12059       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12060       self._RemoveOldStorage(self.target_node, iv_names)
12061       # TODO: Check if releasing locks early still makes sense
12062       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12063     else:
12064       # Release all resource locks except those used by the instance
12065       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12066                     keep=self.node_secondary_ip.keys())
12067
12068     # Release all node locks while waiting for sync
12069     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12070
12071     # TODO: Can the instance lock be downgraded here? Take the optional disk
12072     # shutdown in the caller into consideration.
12073
12074     # Wait for sync
12075     # This can fail as the old devices are degraded and _WaitForSync
12076     # does a combined result over all disks, so we don't check its return value
12077     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12078     _WaitForSync(self.lu, self.instance)
12079
12080     # Check all devices manually
12081     self._CheckDevices(self.instance.primary_node, iv_names)
12082
12083     # Step: remove old storage
12084     if not self.early_release:
12085       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12086       self._RemoveOldStorage(self.target_node, iv_names)
12087
12088   def _ExecDrbd8Secondary(self, feedback_fn):
12089     """Replace the secondary node for DRBD 8.
12090
12091     The algorithm for replace is quite complicated:
12092       - for all disks of the instance:
12093         - create new LVs on the new node with same names
12094         - shutdown the drbd device on the old secondary
12095         - disconnect the drbd network on the primary
12096         - create the drbd device on the new secondary
12097         - network attach the drbd on the primary, using an artifice:
12098           the drbd code for Attach() will connect to the network if it
12099           finds a device which is connected to the good local disks but
12100           not network enabled
12101       - wait for sync across all devices
12102       - remove all disks from the old secondary
12103
12104     Failures are not very well handled.
12105
12106     """
12107     steps_total = 6
12108
12109     pnode = self.instance.primary_node
12110
12111     # Step: check device activation
12112     self.lu.LogStep(1, steps_total, "Check device existence")
12113     self._CheckDisksExistence([self.instance.primary_node])
12114     self._CheckVolumeGroup([self.instance.primary_node])
12115
12116     # Step: check other node consistency
12117     self.lu.LogStep(2, steps_total, "Check peer consistency")
12118     self._CheckDisksConsistency(self.instance.primary_node, True, True)
12119
12120     # Step: create new storage
12121     self.lu.LogStep(3, steps_total, "Allocate new storage")
12122     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12123     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12124     for idx, dev in enumerate(disks):
12125       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12126                       (self.new_node, idx))
12127       # we pass force_create=True to force LVM creation
12128       for new_lv in dev.children:
12129         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12130                              True, _GetInstanceInfoText(self.instance), False,
12131                              excl_stor)
12132
12133     # Step 4: dbrd minors and drbd setups changes
12134     # after this, we must manually remove the drbd minors on both the
12135     # error and the success paths
12136     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12137     minors = self.cfg.AllocateDRBDMinor([self.new_node
12138                                          for dev in self.instance.disks],
12139                                         self.instance.name)
12140     logging.debug("Allocated minors %r", minors)
12141
12142     iv_names = {}
12143     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12144       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12145                       (self.new_node, idx))
12146       # create new devices on new_node; note that we create two IDs:
12147       # one without port, so the drbd will be activated without
12148       # networking information on the new node at this stage, and one
12149       # with network, for the latter activation in step 4
12150       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12151       if self.instance.primary_node == o_node1:
12152         p_minor = o_minor1
12153       else:
12154         assert self.instance.primary_node == o_node2, "Three-node instance?"
12155         p_minor = o_minor2
12156
12157       new_alone_id = (self.instance.primary_node, self.new_node, None,
12158                       p_minor, new_minor, o_secret)
12159       new_net_id = (self.instance.primary_node, self.new_node, o_port,
12160                     p_minor, new_minor, o_secret)
12161
12162       iv_names[idx] = (dev, dev.children, new_net_id)
12163       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12164                     new_net_id)
12165       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12166                               logical_id=new_alone_id,
12167                               children=dev.children,
12168                               size=dev.size,
12169                               params={})
12170       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12171                                              self.cfg)
12172       try:
12173         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12174                               anno_new_drbd,
12175                               _GetInstanceInfoText(self.instance), False,
12176                               excl_stor)
12177       except errors.GenericError:
12178         self.cfg.ReleaseDRBDMinors(self.instance.name)
12179         raise
12180
12181     # We have new devices, shutdown the drbd on the old secondary
12182     for idx, dev in enumerate(self.instance.disks):
12183       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12184       self.cfg.SetDiskID(dev, self.target_node)
12185       msg = self.rpc.call_blockdev_shutdown(self.target_node,
12186                                             (dev, self.instance)).fail_msg
12187       if msg:
12188         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12189                            "node: %s" % (idx, msg),
12190                            hint=("Please cleanup this device manually as"
12191                                  " soon as possible"))
12192
12193     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12194     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12195                                                self.instance.disks)[pnode]
12196
12197     msg = result.fail_msg
12198     if msg:
12199       # detaches didn't succeed (unlikely)
12200       self.cfg.ReleaseDRBDMinors(self.instance.name)
12201       raise errors.OpExecError("Can't detach the disks from the network on"
12202                                " old node: %s" % (msg,))
12203
12204     # if we managed to detach at least one, we update all the disks of
12205     # the instance to point to the new secondary
12206     self.lu.LogInfo("Updating instance configuration")
12207     for dev, _, new_logical_id in iv_names.itervalues():
12208       dev.logical_id = new_logical_id
12209       self.cfg.SetDiskID(dev, self.instance.primary_node)
12210
12211     self.cfg.Update(self.instance, feedback_fn)
12212
12213     # Release all node locks (the configuration has been updated)
12214     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12215
12216     # and now perform the drbd attach
12217     self.lu.LogInfo("Attaching primary drbds to new secondary"
12218                     " (standalone => connected)")
12219     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12220                                             self.new_node],
12221                                            self.node_secondary_ip,
12222                                            (self.instance.disks, self.instance),
12223                                            self.instance.name,
12224                                            False)
12225     for to_node, to_result in result.items():
12226       msg = to_result.fail_msg
12227       if msg:
12228         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12229                            to_node, msg,
12230                            hint=("please do a gnt-instance info to see the"
12231                                  " status of disks"))
12232
12233     cstep = itertools.count(5)
12234
12235     if self.early_release:
12236       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12237       self._RemoveOldStorage(self.target_node, iv_names)
12238       # TODO: Check if releasing locks early still makes sense
12239       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12240     else:
12241       # Release all resource locks except those used by the instance
12242       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12243                     keep=self.node_secondary_ip.keys())
12244
12245     # TODO: Can the instance lock be downgraded here? Take the optional disk
12246     # shutdown in the caller into consideration.
12247
12248     # Wait for sync
12249     # This can fail as the old devices are degraded and _WaitForSync
12250     # does a combined result over all disks, so we don't check its return value
12251     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12252     _WaitForSync(self.lu, self.instance)
12253
12254     # Check all devices manually
12255     self._CheckDevices(self.instance.primary_node, iv_names)
12256
12257     # Step: remove old storage
12258     if not self.early_release:
12259       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12260       self._RemoveOldStorage(self.target_node, iv_names)
12261
12262
12263 class LURepairNodeStorage(NoHooksLU):
12264   """Repairs the volume group on a node.
12265
12266   """
12267   REQ_BGL = False
12268
12269   def CheckArguments(self):
12270     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12271
12272     storage_type = self.op.storage_type
12273
12274     if (constants.SO_FIX_CONSISTENCY not in
12275         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12276       raise errors.OpPrereqError("Storage units of type '%s' can not be"
12277                                  " repaired" % storage_type,
12278                                  errors.ECODE_INVAL)
12279
12280   def ExpandNames(self):
12281     self.needed_locks = {
12282       locking.LEVEL_NODE: [self.op.node_name],
12283       }
12284
12285   def _CheckFaultyDisks(self, instance, node_name):
12286     """Ensure faulty disks abort the opcode or at least warn."""
12287     try:
12288       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12289                                   node_name, True):
12290         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12291                                    " node '%s'" % (instance.name, node_name),
12292                                    errors.ECODE_STATE)
12293     except errors.OpPrereqError, err:
12294       if self.op.ignore_consistency:
12295         self.LogWarning(str(err.args[0]))
12296       else:
12297         raise
12298
12299   def CheckPrereq(self):
12300     """Check prerequisites.
12301
12302     """
12303     # Check whether any instance on this node has faulty disks
12304     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12305       if inst.admin_state != constants.ADMINST_UP:
12306         continue
12307       check_nodes = set(inst.all_nodes)
12308       check_nodes.discard(self.op.node_name)
12309       for inst_node_name in check_nodes:
12310         self._CheckFaultyDisks(inst, inst_node_name)
12311
12312   def Exec(self, feedback_fn):
12313     feedback_fn("Repairing storage unit '%s' on %s ..." %
12314                 (self.op.name, self.op.node_name))
12315
12316     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12317     result = self.rpc.call_storage_execute(self.op.node_name,
12318                                            self.op.storage_type, st_args,
12319                                            self.op.name,
12320                                            constants.SO_FIX_CONSISTENCY)
12321     result.Raise("Failed to repair storage unit '%s' on %s" %
12322                  (self.op.name, self.op.node_name))
12323
12324
12325 class LUNodeEvacuate(NoHooksLU):
12326   """Evacuates instances off a list of nodes.
12327
12328   """
12329   REQ_BGL = False
12330
12331   _MODE2IALLOCATOR = {
12332     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12333     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12334     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12335     }
12336   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12337   assert (frozenset(_MODE2IALLOCATOR.values()) ==
12338           constants.IALLOCATOR_NEVAC_MODES)
12339
12340   def CheckArguments(self):
12341     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12342
12343   def ExpandNames(self):
12344     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12345
12346     if self.op.remote_node is not None:
12347       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12348       assert self.op.remote_node
12349
12350       if self.op.remote_node == self.op.node_name:
12351         raise errors.OpPrereqError("Can not use evacuated node as a new"
12352                                    " secondary node", errors.ECODE_INVAL)
12353
12354       if self.op.mode != constants.NODE_EVAC_SEC:
12355         raise errors.OpPrereqError("Without the use of an iallocator only"
12356                                    " secondary instances can be evacuated",
12357                                    errors.ECODE_INVAL)
12358
12359     # Declare locks
12360     self.share_locks = _ShareAll()
12361     self.needed_locks = {
12362       locking.LEVEL_INSTANCE: [],
12363       locking.LEVEL_NODEGROUP: [],
12364       locking.LEVEL_NODE: [],
12365       }
12366
12367     # Determine nodes (via group) optimistically, needs verification once locks
12368     # have been acquired
12369     self.lock_nodes = self._DetermineNodes()
12370
12371   def _DetermineNodes(self):
12372     """Gets the list of nodes to operate on.
12373
12374     """
12375     if self.op.remote_node is None:
12376       # Iallocator will choose any node(s) in the same group
12377       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12378     else:
12379       group_nodes = frozenset([self.op.remote_node])
12380
12381     # Determine nodes to be locked
12382     return set([self.op.node_name]) | group_nodes
12383
12384   def _DetermineInstances(self):
12385     """Builds list of instances to operate on.
12386
12387     """
12388     assert self.op.mode in constants.NODE_EVAC_MODES
12389
12390     if self.op.mode == constants.NODE_EVAC_PRI:
12391       # Primary instances only
12392       inst_fn = _GetNodePrimaryInstances
12393       assert self.op.remote_node is None, \
12394         "Evacuating primary instances requires iallocator"
12395     elif self.op.mode == constants.NODE_EVAC_SEC:
12396       # Secondary instances only
12397       inst_fn = _GetNodeSecondaryInstances
12398     else:
12399       # All instances
12400       assert self.op.mode == constants.NODE_EVAC_ALL
12401       inst_fn = _GetNodeInstances
12402       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12403       # per instance
12404       raise errors.OpPrereqError("Due to an issue with the iallocator"
12405                                  " interface it is not possible to evacuate"
12406                                  " all instances at once; specify explicitly"
12407                                  " whether to evacuate primary or secondary"
12408                                  " instances",
12409                                  errors.ECODE_INVAL)
12410
12411     return inst_fn(self.cfg, self.op.node_name)
12412
12413   def DeclareLocks(self, level):
12414     if level == locking.LEVEL_INSTANCE:
12415       # Lock instances optimistically, needs verification once node and group
12416       # locks have been acquired
12417       self.needed_locks[locking.LEVEL_INSTANCE] = \
12418         set(i.name for i in self._DetermineInstances())
12419
12420     elif level == locking.LEVEL_NODEGROUP:
12421       # Lock node groups for all potential target nodes optimistically, needs
12422       # verification once nodes have been acquired
12423       self.needed_locks[locking.LEVEL_NODEGROUP] = \
12424         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12425
12426     elif level == locking.LEVEL_NODE:
12427       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12428
12429   def CheckPrereq(self):
12430     # Verify locks
12431     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12432     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12433     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12434
12435     need_nodes = self._DetermineNodes()
12436
12437     if not owned_nodes.issuperset(need_nodes):
12438       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12439                                  " locks were acquired, current nodes are"
12440                                  " are '%s', used to be '%s'; retry the"
12441                                  " operation" %
12442                                  (self.op.node_name,
12443                                   utils.CommaJoin(need_nodes),
12444                                   utils.CommaJoin(owned_nodes)),
12445                                  errors.ECODE_STATE)
12446
12447     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12448     if owned_groups != wanted_groups:
12449       raise errors.OpExecError("Node groups changed since locks were acquired,"
12450                                " current groups are '%s', used to be '%s';"
12451                                " retry the operation" %
12452                                (utils.CommaJoin(wanted_groups),
12453                                 utils.CommaJoin(owned_groups)))
12454
12455     # Determine affected instances
12456     self.instances = self._DetermineInstances()
12457     self.instance_names = [i.name for i in self.instances]
12458
12459     if set(self.instance_names) != owned_instances:
12460       raise errors.OpExecError("Instances on node '%s' changed since locks"
12461                                " were acquired, current instances are '%s',"
12462                                " used to be '%s'; retry the operation" %
12463                                (self.op.node_name,
12464                                 utils.CommaJoin(self.instance_names),
12465                                 utils.CommaJoin(owned_instances)))
12466
12467     if self.instance_names:
12468       self.LogInfo("Evacuating instances from node '%s': %s",
12469                    self.op.node_name,
12470                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
12471     else:
12472       self.LogInfo("No instances to evacuate from node '%s'",
12473                    self.op.node_name)
12474
12475     if self.op.remote_node is not None:
12476       for i in self.instances:
12477         if i.primary_node == self.op.remote_node:
12478           raise errors.OpPrereqError("Node %s is the primary node of"
12479                                      " instance %s, cannot use it as"
12480                                      " secondary" %
12481                                      (self.op.remote_node, i.name),
12482                                      errors.ECODE_INVAL)
12483
12484   def Exec(self, feedback_fn):
12485     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12486
12487     if not self.instance_names:
12488       # No instances to evacuate
12489       jobs = []
12490
12491     elif self.op.iallocator is not None:
12492       # TODO: Implement relocation to other group
12493       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12494       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12495                                      instances=list(self.instance_names))
12496       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12497
12498       ial.Run(self.op.iallocator)
12499
12500       if not ial.success:
12501         raise errors.OpPrereqError("Can't compute node evacuation using"
12502                                    " iallocator '%s': %s" %
12503                                    (self.op.iallocator, ial.info),
12504                                    errors.ECODE_NORES)
12505
12506       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12507
12508     elif self.op.remote_node is not None:
12509       assert self.op.mode == constants.NODE_EVAC_SEC
12510       jobs = [
12511         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12512                                         remote_node=self.op.remote_node,
12513                                         disks=[],
12514                                         mode=constants.REPLACE_DISK_CHG,
12515                                         early_release=self.op.early_release)]
12516         for instance_name in self.instance_names]
12517
12518     else:
12519       raise errors.ProgrammerError("No iallocator or remote node")
12520
12521     return ResultWithJobs(jobs)
12522
12523
12524 def _SetOpEarlyRelease(early_release, op):
12525   """Sets C{early_release} flag on opcodes if available.
12526
12527   """
12528   try:
12529     op.early_release = early_release
12530   except AttributeError:
12531     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12532
12533   return op
12534
12535
12536 def _NodeEvacDest(use_nodes, group, nodes):
12537   """Returns group or nodes depending on caller's choice.
12538
12539   """
12540   if use_nodes:
12541     return utils.CommaJoin(nodes)
12542   else:
12543     return group
12544
12545
12546 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12547   """Unpacks the result of change-group and node-evacuate iallocator requests.
12548
12549   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12550   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12551
12552   @type lu: L{LogicalUnit}
12553   @param lu: Logical unit instance
12554   @type alloc_result: tuple/list
12555   @param alloc_result: Result from iallocator
12556   @type early_release: bool
12557   @param early_release: Whether to release locks early if possible
12558   @type use_nodes: bool
12559   @param use_nodes: Whether to display node names instead of groups
12560
12561   """
12562   (moved, failed, jobs) = alloc_result
12563
12564   if failed:
12565     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12566                                  for (name, reason) in failed)
12567     lu.LogWarning("Unable to evacuate instances %s", failreason)
12568     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12569
12570   if moved:
12571     lu.LogInfo("Instances to be moved: %s",
12572                utils.CommaJoin("%s (to %s)" %
12573                                (name, _NodeEvacDest(use_nodes, group, nodes))
12574                                for (name, group, nodes) in moved))
12575
12576   return [map(compat.partial(_SetOpEarlyRelease, early_release),
12577               map(opcodes.OpCode.LoadOpCode, ops))
12578           for ops in jobs]
12579
12580
12581 def _DiskSizeInBytesToMebibytes(lu, size):
12582   """Converts a disk size in bytes to mebibytes.
12583
12584   Warns and rounds up if the size isn't an even multiple of 1 MiB.
12585
12586   """
12587   (mib, remainder) = divmod(size, 1024 * 1024)
12588
12589   if remainder != 0:
12590     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12591                   " to not overwrite existing data (%s bytes will not be"
12592                   " wiped)", (1024 * 1024) - remainder)
12593     mib += 1
12594
12595   return mib
12596
12597
12598 class LUInstanceGrowDisk(LogicalUnit):
12599   """Grow a disk of an instance.
12600
12601   """
12602   HPATH = "disk-grow"
12603   HTYPE = constants.HTYPE_INSTANCE
12604   REQ_BGL = False
12605
12606   def ExpandNames(self):
12607     self._ExpandAndLockInstance()
12608     self.needed_locks[locking.LEVEL_NODE] = []
12609     self.needed_locks[locking.LEVEL_NODE_RES] = []
12610     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12611     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12612
12613   def DeclareLocks(self, level):
12614     if level == locking.LEVEL_NODE:
12615       self._LockInstancesNodes()
12616     elif level == locking.LEVEL_NODE_RES:
12617       # Copy node locks
12618       self.needed_locks[locking.LEVEL_NODE_RES] = \
12619         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12620
12621   def BuildHooksEnv(self):
12622     """Build hooks env.
12623
12624     This runs on the master, the primary and all the secondaries.
12625
12626     """
12627     env = {
12628       "DISK": self.op.disk,
12629       "AMOUNT": self.op.amount,
12630       "ABSOLUTE": self.op.absolute,
12631       }
12632     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12633     return env
12634
12635   def BuildHooksNodes(self):
12636     """Build hooks nodes.
12637
12638     """
12639     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12640     return (nl, nl)
12641
12642   def CheckPrereq(self):
12643     """Check prerequisites.
12644
12645     This checks that the instance is in the cluster.
12646
12647     """
12648     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12649     assert instance is not None, \
12650       "Cannot retrieve locked instance %s" % self.op.instance_name
12651     nodenames = list(instance.all_nodes)
12652     for node in nodenames:
12653       _CheckNodeOnline(self, node)
12654
12655     self.instance = instance
12656
12657     if instance.disk_template not in constants.DTS_GROWABLE:
12658       raise errors.OpPrereqError("Instance's disk layout does not support"
12659                                  " growing", errors.ECODE_INVAL)
12660
12661     self.disk = instance.FindDisk(self.op.disk)
12662
12663     if self.op.absolute:
12664       self.target = self.op.amount
12665       self.delta = self.target - self.disk.size
12666       if self.delta < 0:
12667         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12668                                    "current disk size (%s)" %
12669                                    (utils.FormatUnit(self.target, "h"),
12670                                     utils.FormatUnit(self.disk.size, "h")),
12671                                    errors.ECODE_STATE)
12672     else:
12673       self.delta = self.op.amount
12674       self.target = self.disk.size + self.delta
12675       if self.delta < 0:
12676         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12677                                    utils.FormatUnit(self.delta, "h"),
12678                                    errors.ECODE_INVAL)
12679
12680     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12681
12682   def _CheckDiskSpace(self, nodenames, req_vgspace):
12683     template = self.instance.disk_template
12684     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12685       # TODO: check the free disk space for file, when that feature will be
12686       # supported
12687       nodes = map(self.cfg.GetNodeInfo, nodenames)
12688       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12689                         nodes)
12690       if es_nodes:
12691         # With exclusive storage we need to something smarter than just looking
12692         # at free space; for now, let's simply abort the operation.
12693         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12694                                    " is enabled", errors.ECODE_STATE)
12695       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12696
12697   def Exec(self, feedback_fn):
12698     """Execute disk grow.
12699
12700     """
12701     instance = self.instance
12702     disk = self.disk
12703
12704     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12705     assert (self.owned_locks(locking.LEVEL_NODE) ==
12706             self.owned_locks(locking.LEVEL_NODE_RES))
12707
12708     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12709
12710     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12711     if not disks_ok:
12712       raise errors.OpExecError("Cannot activate block device to grow")
12713
12714     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12715                 (self.op.disk, instance.name,
12716                  utils.FormatUnit(self.delta, "h"),
12717                  utils.FormatUnit(self.target, "h")))
12718
12719     # First run all grow ops in dry-run mode
12720     for node in instance.all_nodes:
12721       self.cfg.SetDiskID(disk, node)
12722       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12723                                            True, True)
12724       result.Raise("Dry-run grow request failed to node %s" % node)
12725
12726     if wipe_disks:
12727       # Get disk size from primary node for wiping
12728       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12729       result.Raise("Failed to retrieve disk size from node '%s'" %
12730                    instance.primary_node)
12731
12732       (disk_size_in_bytes, ) = result.payload
12733
12734       if disk_size_in_bytes is None:
12735         raise errors.OpExecError("Failed to retrieve disk size from primary"
12736                                  " node '%s'" % instance.primary_node)
12737
12738       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12739
12740       assert old_disk_size >= disk.size, \
12741         ("Retrieved disk size too small (got %s, should be at least %s)" %
12742          (old_disk_size, disk.size))
12743     else:
12744       old_disk_size = None
12745
12746     # We know that (as far as we can test) operations across different
12747     # nodes will succeed, time to run it for real on the backing storage
12748     for node in instance.all_nodes:
12749       self.cfg.SetDiskID(disk, node)
12750       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12751                                            False, True)
12752       result.Raise("Grow request failed to node %s" % node)
12753
12754     # And now execute it for logical storage, on the primary node
12755     node = instance.primary_node
12756     self.cfg.SetDiskID(disk, node)
12757     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12758                                          False, False)
12759     result.Raise("Grow request failed to node %s" % node)
12760
12761     disk.RecordGrow(self.delta)
12762     self.cfg.Update(instance, feedback_fn)
12763
12764     # Changes have been recorded, release node lock
12765     _ReleaseLocks(self, locking.LEVEL_NODE)
12766
12767     # Downgrade lock while waiting for sync
12768     self.glm.downgrade(locking.LEVEL_INSTANCE)
12769
12770     assert wipe_disks ^ (old_disk_size is None)
12771
12772     if wipe_disks:
12773       assert instance.disks[self.op.disk] == disk
12774
12775       # Wipe newly added disk space
12776       _WipeDisks(self, instance,
12777                  disks=[(self.op.disk, disk, old_disk_size)])
12778
12779     if self.op.wait_for_sync:
12780       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12781       if disk_abort:
12782         self.LogWarning("Disk syncing has not returned a good status; check"
12783                         " the instance")
12784       if instance.admin_state != constants.ADMINST_UP:
12785         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12786     elif instance.admin_state != constants.ADMINST_UP:
12787       self.LogWarning("Not shutting down the disk even if the instance is"
12788                       " not supposed to be running because no wait for"
12789                       " sync mode was requested")
12790
12791     assert self.owned_locks(locking.LEVEL_NODE_RES)
12792     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12793
12794
12795 class LUInstanceQueryData(NoHooksLU):
12796   """Query runtime instance data.
12797
12798   """
12799   REQ_BGL = False
12800
12801   def ExpandNames(self):
12802     self.needed_locks = {}
12803
12804     # Use locking if requested or when non-static information is wanted
12805     if not (self.op.static or self.op.use_locking):
12806       self.LogWarning("Non-static data requested, locks need to be acquired")
12807       self.op.use_locking = True
12808
12809     if self.op.instances or not self.op.use_locking:
12810       # Expand instance names right here
12811       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12812     else:
12813       # Will use acquired locks
12814       self.wanted_names = None
12815
12816     if self.op.use_locking:
12817       self.share_locks = _ShareAll()
12818
12819       if self.wanted_names is None:
12820         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12821       else:
12822         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12823
12824       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12825       self.needed_locks[locking.LEVEL_NODE] = []
12826       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12827
12828   def DeclareLocks(self, level):
12829     if self.op.use_locking:
12830       if level == locking.LEVEL_NODEGROUP:
12831         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12832
12833         # Lock all groups used by instances optimistically; this requires going
12834         # via the node before it's locked, requiring verification later on
12835         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12836           frozenset(group_uuid
12837                     for instance_name in owned_instances
12838                     for group_uuid in
12839                       self.cfg.GetInstanceNodeGroups(instance_name))
12840
12841       elif level == locking.LEVEL_NODE:
12842         self._LockInstancesNodes()
12843
12844   def CheckPrereq(self):
12845     """Check prerequisites.
12846
12847     This only checks the optional instance list against the existing names.
12848
12849     """
12850     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12851     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12852     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12853
12854     if self.wanted_names is None:
12855       assert self.op.use_locking, "Locking was not used"
12856       self.wanted_names = owned_instances
12857
12858     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12859
12860     if self.op.use_locking:
12861       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12862                                 None)
12863     else:
12864       assert not (owned_instances or owned_groups or owned_nodes)
12865
12866     self.wanted_instances = instances.values()
12867
12868   def _ComputeBlockdevStatus(self, node, instance, dev):
12869     """Returns the status of a block device
12870
12871     """
12872     if self.op.static or not node:
12873       return None
12874
12875     self.cfg.SetDiskID(dev, node)
12876
12877     result = self.rpc.call_blockdev_find(node, dev)
12878     if result.offline:
12879       return None
12880
12881     result.Raise("Can't compute disk status for %s" % instance.name)
12882
12883     status = result.payload
12884     if status is None:
12885       return None
12886
12887     return (status.dev_path, status.major, status.minor,
12888             status.sync_percent, status.estimated_time,
12889             status.is_degraded, status.ldisk_status)
12890
12891   def _ComputeDiskStatus(self, instance, snode, dev):
12892     """Compute block device status.
12893
12894     """
12895     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12896
12897     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12898
12899   def _ComputeDiskStatusInner(self, instance, snode, dev):
12900     """Compute block device status.
12901
12902     @attention: The device has to be annotated already.
12903
12904     """
12905     if dev.dev_type in constants.LDS_DRBD:
12906       # we change the snode then (otherwise we use the one passed in)
12907       if dev.logical_id[0] == instance.primary_node:
12908         snode = dev.logical_id[1]
12909       else:
12910         snode = dev.logical_id[0]
12911
12912     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12913                                               instance, dev)
12914     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12915
12916     if dev.children:
12917       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12918                                         instance, snode),
12919                          dev.children)
12920     else:
12921       dev_children = []
12922
12923     return {
12924       "iv_name": dev.iv_name,
12925       "dev_type": dev.dev_type,
12926       "logical_id": dev.logical_id,
12927       "physical_id": dev.physical_id,
12928       "pstatus": dev_pstatus,
12929       "sstatus": dev_sstatus,
12930       "children": dev_children,
12931       "mode": dev.mode,
12932       "size": dev.size,
12933       }
12934
12935   def Exec(self, feedback_fn):
12936     """Gather and return data"""
12937     result = {}
12938
12939     cluster = self.cfg.GetClusterInfo()
12940
12941     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12942     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12943
12944     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12945                                                  for node in nodes.values()))
12946
12947     group2name_fn = lambda uuid: groups[uuid].name
12948
12949     for instance in self.wanted_instances:
12950       pnode = nodes[instance.primary_node]
12951
12952       if self.op.static or pnode.offline:
12953         remote_state = None
12954         if pnode.offline:
12955           self.LogWarning("Primary node %s is marked offline, returning static"
12956                           " information only for instance %s" %
12957                           (pnode.name, instance.name))
12958       else:
12959         remote_info = self.rpc.call_instance_info(instance.primary_node,
12960                                                   instance.name,
12961                                                   instance.hypervisor)
12962         remote_info.Raise("Error checking node %s" % instance.primary_node)
12963         remote_info = remote_info.payload
12964         if remote_info and "state" in remote_info:
12965           remote_state = "up"
12966         else:
12967           if instance.admin_state == constants.ADMINST_UP:
12968             remote_state = "down"
12969           else:
12970             remote_state = instance.admin_state
12971
12972       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12973                   instance.disks)
12974
12975       snodes_group_uuids = [nodes[snode_name].group
12976                             for snode_name in instance.secondary_nodes]
12977
12978       result[instance.name] = {
12979         "name": instance.name,
12980         "config_state": instance.admin_state,
12981         "run_state": remote_state,
12982         "pnode": instance.primary_node,
12983         "pnode_group_uuid": pnode.group,
12984         "pnode_group_name": group2name_fn(pnode.group),
12985         "snodes": instance.secondary_nodes,
12986         "snodes_group_uuids": snodes_group_uuids,
12987         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12988         "os": instance.os,
12989         # this happens to be the same format used for hooks
12990         "nics": _NICListToTuple(self, instance.nics),
12991         "disk_template": instance.disk_template,
12992         "disks": disks,
12993         "hypervisor": instance.hypervisor,
12994         "network_port": instance.network_port,
12995         "hv_instance": instance.hvparams,
12996         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12997         "be_instance": instance.beparams,
12998         "be_actual": cluster.FillBE(instance),
12999         "os_instance": instance.osparams,
13000         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13001         "serial_no": instance.serial_no,
13002         "mtime": instance.mtime,
13003         "ctime": instance.ctime,
13004         "uuid": instance.uuid,
13005         }
13006
13007     return result
13008
13009
13010 def PrepareContainerMods(mods, private_fn):
13011   """Prepares a list of container modifications by adding a private data field.
13012
13013   @type mods: list of tuples; (operation, index, parameters)
13014   @param mods: List of modifications
13015   @type private_fn: callable or None
13016   @param private_fn: Callable for constructing a private data field for a
13017     modification
13018   @rtype: list
13019
13020   """
13021   if private_fn is None:
13022     fn = lambda: None
13023   else:
13024     fn = private_fn
13025
13026   return [(op, idx, params, fn()) for (op, idx, params) in mods]
13027
13028
13029 #: Type description for changes as returned by L{ApplyContainerMods}'s
13030 #: callbacks
13031 _TApplyContModsCbChanges = \
13032   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13033     ht.TNonEmptyString,
13034     ht.TAny,
13035     ])))
13036
13037
13038 def ApplyContainerMods(kind, container, chgdesc, mods,
13039                        create_fn, modify_fn, remove_fn):
13040   """Applies descriptions in C{mods} to C{container}.
13041
13042   @type kind: string
13043   @param kind: One-word item description
13044   @type container: list
13045   @param container: Container to modify
13046   @type chgdesc: None or list
13047   @param chgdesc: List of applied changes
13048   @type mods: list
13049   @param mods: Modifications as returned by L{PrepareContainerMods}
13050   @type create_fn: callable
13051   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13052     receives absolute item index, parameters and private data object as added
13053     by L{PrepareContainerMods}, returns tuple containing new item and changes
13054     as list
13055   @type modify_fn: callable
13056   @param modify_fn: Callback for modifying an existing item
13057     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13058     and private data object as added by L{PrepareContainerMods}, returns
13059     changes as list
13060   @type remove_fn: callable
13061   @param remove_fn: Callback on removing item; receives absolute item index,
13062     item and private data object as added by L{PrepareContainerMods}
13063
13064   """
13065   for (op, idx, params, private) in mods:
13066     if idx == -1:
13067       # Append
13068       absidx = len(container) - 1
13069     elif idx < 0:
13070       raise IndexError("Not accepting negative indices other than -1")
13071     elif idx > len(container):
13072       raise IndexError("Got %s index %s, but there are only %s" %
13073                        (kind, idx, len(container)))
13074     else:
13075       absidx = idx
13076
13077     changes = None
13078
13079     if op == constants.DDM_ADD:
13080       # Calculate where item will be added
13081       if idx == -1:
13082         addidx = len(container)
13083       else:
13084         addidx = idx
13085
13086       if create_fn is None:
13087         item = params
13088       else:
13089         (item, changes) = create_fn(addidx, params, private)
13090
13091       if idx == -1:
13092         container.append(item)
13093       else:
13094         assert idx >= 0
13095         assert idx <= len(container)
13096         # list.insert does so before the specified index
13097         container.insert(idx, item)
13098     else:
13099       # Retrieve existing item
13100       try:
13101         item = container[absidx]
13102       except IndexError:
13103         raise IndexError("Invalid %s index %s" % (kind, idx))
13104
13105       if op == constants.DDM_REMOVE:
13106         assert not params
13107
13108         if remove_fn is not None:
13109           remove_fn(absidx, item, private)
13110
13111         changes = [("%s/%s" % (kind, absidx), "remove")]
13112
13113         assert container[absidx] == item
13114         del container[absidx]
13115       elif op == constants.DDM_MODIFY:
13116         if modify_fn is not None:
13117           changes = modify_fn(absidx, item, params, private)
13118       else:
13119         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13120
13121     assert _TApplyContModsCbChanges(changes)
13122
13123     if not (chgdesc is None or changes is None):
13124       chgdesc.extend(changes)
13125
13126
13127 def _UpdateIvNames(base_index, disks):
13128   """Updates the C{iv_name} attribute of disks.
13129
13130   @type disks: list of L{objects.Disk}
13131
13132   """
13133   for (idx, disk) in enumerate(disks):
13134     disk.iv_name = "disk/%s" % (base_index + idx, )
13135
13136
13137 class _InstNicModPrivate:
13138   """Data structure for network interface modifications.
13139
13140   Used by L{LUInstanceSetParams}.
13141
13142   """
13143   def __init__(self):
13144     self.params = None
13145     self.filled = None
13146
13147
13148 class LUInstanceSetParams(LogicalUnit):
13149   """Modifies an instances's parameters.
13150
13151   """
13152   HPATH = "instance-modify"
13153   HTYPE = constants.HTYPE_INSTANCE
13154   REQ_BGL = False
13155
13156   @staticmethod
13157   def _UpgradeDiskNicMods(kind, mods, verify_fn):
13158     assert ht.TList(mods)
13159     assert not mods or len(mods[0]) in (2, 3)
13160
13161     if mods and len(mods[0]) == 2:
13162       result = []
13163
13164       addremove = 0
13165       for op, params in mods:
13166         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13167           result.append((op, -1, params))
13168           addremove += 1
13169
13170           if addremove > 1:
13171             raise errors.OpPrereqError("Only one %s add or remove operation is"
13172                                        " supported at a time" % kind,
13173                                        errors.ECODE_INVAL)
13174         else:
13175           result.append((constants.DDM_MODIFY, op, params))
13176
13177       assert verify_fn(result)
13178     else:
13179       result = mods
13180
13181     return result
13182
13183   @staticmethod
13184   def _CheckMods(kind, mods, key_types, item_fn):
13185     """Ensures requested disk/NIC modifications are valid.
13186
13187     """
13188     for (op, _, params) in mods:
13189       assert ht.TDict(params)
13190
13191       # If 'key_types' is an empty dict, we assume we have an
13192       # 'ext' template and thus do not ForceDictType
13193       if key_types:
13194         utils.ForceDictType(params, key_types)
13195
13196       if op == constants.DDM_REMOVE:
13197         if params:
13198           raise errors.OpPrereqError("No settings should be passed when"
13199                                      " removing a %s" % kind,
13200                                      errors.ECODE_INVAL)
13201       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13202         item_fn(op, params)
13203       else:
13204         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13205
13206   @staticmethod
13207   def _VerifyDiskModification(op, params):
13208     """Verifies a disk modification.
13209
13210     """
13211     if op == constants.DDM_ADD:
13212       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13213       if mode not in constants.DISK_ACCESS_SET:
13214         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13215                                    errors.ECODE_INVAL)
13216
13217       size = params.get(constants.IDISK_SIZE, None)
13218       if size is None:
13219         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13220                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
13221
13222       try:
13223         size = int(size)
13224       except (TypeError, ValueError), err:
13225         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13226                                    errors.ECODE_INVAL)
13227
13228       params[constants.IDISK_SIZE] = size
13229
13230     elif op == constants.DDM_MODIFY:
13231       if constants.IDISK_SIZE in params:
13232         raise errors.OpPrereqError("Disk size change not possible, use"
13233                                    " grow-disk", errors.ECODE_INVAL)
13234       if constants.IDISK_MODE not in params:
13235         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13236                                    " modification supported, but missing",
13237                                    errors.ECODE_NOENT)
13238       if len(params) > 1:
13239         raise errors.OpPrereqError("Disk modification doesn't support"
13240                                    " additional arbitrary parameters",
13241                                    errors.ECODE_INVAL)
13242
13243   @staticmethod
13244   def _VerifyNicModification(op, params):
13245     """Verifies a network interface modification.
13246
13247     """
13248     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13249       ip = params.get(constants.INIC_IP, None)
13250       req_net = params.get(constants.INIC_NETWORK, None)
13251       link = params.get(constants.NIC_LINK, None)
13252       mode = params.get(constants.NIC_MODE, None)
13253       if req_net is not None:
13254         if req_net.lower() == constants.VALUE_NONE:
13255           params[constants.INIC_NETWORK] = None
13256           req_net = None
13257         elif link is not None or mode is not None:
13258           raise errors.OpPrereqError("If network is given"
13259                                      " mode or link should not",
13260                                      errors.ECODE_INVAL)
13261
13262       if op == constants.DDM_ADD:
13263         macaddr = params.get(constants.INIC_MAC, None)
13264         if macaddr is None:
13265           params[constants.INIC_MAC] = constants.VALUE_AUTO
13266
13267       if ip is not None:
13268         if ip.lower() == constants.VALUE_NONE:
13269           params[constants.INIC_IP] = None
13270         else:
13271           if ip.lower() == constants.NIC_IP_POOL:
13272             if op == constants.DDM_ADD and req_net is None:
13273               raise errors.OpPrereqError("If ip=pool, parameter network"
13274                                          " cannot be none",
13275                                          errors.ECODE_INVAL)
13276           else:
13277             if not netutils.IPAddress.IsValid(ip):
13278               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13279                                          errors.ECODE_INVAL)
13280
13281       if constants.INIC_MAC in params:
13282         macaddr = params[constants.INIC_MAC]
13283         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13284           macaddr = utils.NormalizeAndValidateMac(macaddr)
13285
13286         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13287           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13288                                      " modifying an existing NIC",
13289                                      errors.ECODE_INVAL)
13290
13291   def CheckArguments(self):
13292     if not (self.op.nics or self.op.disks or self.op.disk_template or
13293             self.op.hvparams or self.op.beparams or self.op.os_name or
13294             self.op.offline is not None or self.op.runtime_mem):
13295       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13296
13297     if self.op.hvparams:
13298       _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13299                             "hypervisor", "instance", "cluster")
13300
13301     self.op.disks = self._UpgradeDiskNicMods(
13302       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13303     self.op.nics = self._UpgradeDiskNicMods(
13304       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13305
13306     if self.op.disks and self.op.disk_template is not None:
13307       raise errors.OpPrereqError("Disk template conversion and other disk"
13308                                  " changes not supported at the same time",
13309                                  errors.ECODE_INVAL)
13310
13311     if (self.op.disk_template and
13312         self.op.disk_template in constants.DTS_INT_MIRROR and
13313         self.op.remote_node is None):
13314       raise errors.OpPrereqError("Changing the disk template to a mirrored"
13315                                  " one requires specifying a secondary node",
13316                                  errors.ECODE_INVAL)
13317
13318     # Check NIC modifications
13319     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13320                     self._VerifyNicModification)
13321
13322   def ExpandNames(self):
13323     self._ExpandAndLockInstance()
13324     self.needed_locks[locking.LEVEL_NODEGROUP] = []
13325     # Can't even acquire node locks in shared mode as upcoming changes in
13326     # Ganeti 2.6 will start to modify the node object on disk conversion
13327     self.needed_locks[locking.LEVEL_NODE] = []
13328     self.needed_locks[locking.LEVEL_NODE_RES] = []
13329     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13330     # Look node group to look up the ipolicy
13331     self.share_locks[locking.LEVEL_NODEGROUP] = 1
13332
13333   def DeclareLocks(self, level):
13334     if level == locking.LEVEL_NODEGROUP:
13335       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13336       # Acquire locks for the instance's nodegroups optimistically. Needs
13337       # to be verified in CheckPrereq
13338       self.needed_locks[locking.LEVEL_NODEGROUP] = \
13339         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13340     elif level == locking.LEVEL_NODE:
13341       self._LockInstancesNodes()
13342       if self.op.disk_template and self.op.remote_node:
13343         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13344         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13345     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13346       # Copy node locks
13347       self.needed_locks[locking.LEVEL_NODE_RES] = \
13348         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13349
13350   def BuildHooksEnv(self):
13351     """Build hooks env.
13352
13353     This runs on the master, primary and secondaries.
13354
13355     """
13356     args = {}
13357     if constants.BE_MINMEM in self.be_new:
13358       args["minmem"] = self.be_new[constants.BE_MINMEM]
13359     if constants.BE_MAXMEM in self.be_new:
13360       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13361     if constants.BE_VCPUS in self.be_new:
13362       args["vcpus"] = self.be_new[constants.BE_VCPUS]
13363     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13364     # information at all.
13365
13366     if self._new_nics is not None:
13367       nics = []
13368
13369       for nic in self._new_nics:
13370         n = copy.deepcopy(nic)
13371         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13372         n.nicparams = nicparams
13373         nics.append(_NICToTuple(self, n))
13374
13375       args["nics"] = nics
13376
13377     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13378     if self.op.disk_template:
13379       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13380     if self.op.runtime_mem:
13381       env["RUNTIME_MEMORY"] = self.op.runtime_mem
13382
13383     return env
13384
13385   def BuildHooksNodes(self):
13386     """Build hooks nodes.
13387
13388     """
13389     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13390     return (nl, nl)
13391
13392   def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13393                               old_params, cluster, pnode):
13394
13395     update_params_dict = dict([(key, params[key])
13396                                for key in constants.NICS_PARAMETERS
13397                                if key in params])
13398
13399     req_link = update_params_dict.get(constants.NIC_LINK, None)
13400     req_mode = update_params_dict.get(constants.NIC_MODE, None)
13401
13402     new_net_uuid = None
13403     new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13404     if new_net_uuid_or_name:
13405       new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13406       new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13407
13408     if old_net_uuid:
13409       old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13410
13411     if new_net_uuid:
13412       netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13413       if not netparams:
13414         raise errors.OpPrereqError("No netparams found for the network"
13415                                    " %s, probably not connected" %
13416                                    new_net_obj.name, errors.ECODE_INVAL)
13417       new_params = dict(netparams)
13418     else:
13419       new_params = _GetUpdatedParams(old_params, update_params_dict)
13420
13421     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13422
13423     new_filled_params = cluster.SimpleFillNIC(new_params)
13424     objects.NIC.CheckParameterSyntax(new_filled_params)
13425
13426     new_mode = new_filled_params[constants.NIC_MODE]
13427     if new_mode == constants.NIC_MODE_BRIDGED:
13428       bridge = new_filled_params[constants.NIC_LINK]
13429       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13430       if msg:
13431         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13432         if self.op.force:
13433           self.warn.append(msg)
13434         else:
13435           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13436
13437     elif new_mode == constants.NIC_MODE_ROUTED:
13438       ip = params.get(constants.INIC_IP, old_ip)
13439       if ip is None:
13440         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13441                                    " on a routed NIC", errors.ECODE_INVAL)
13442
13443     elif new_mode == constants.NIC_MODE_OVS:
13444       # TODO: check OVS link
13445       self.LogInfo("OVS links are currently not checked for correctness")
13446
13447     if constants.INIC_MAC in params:
13448       mac = params[constants.INIC_MAC]
13449       if mac is None:
13450         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13451                                    errors.ECODE_INVAL)
13452       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13453         # otherwise generate the MAC address
13454         params[constants.INIC_MAC] = \
13455           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13456       else:
13457         # or validate/reserve the current one
13458         try:
13459           self.cfg.ReserveMAC(mac, self.proc.GetECId())
13460         except errors.ReservationError:
13461           raise errors.OpPrereqError("MAC address '%s' already in use"
13462                                      " in cluster" % mac,
13463                                      errors.ECODE_NOTUNIQUE)
13464     elif new_net_uuid != old_net_uuid:
13465
13466       def get_net_prefix(net_uuid):
13467         mac_prefix = None
13468         if net_uuid:
13469           nobj = self.cfg.GetNetwork(net_uuid)
13470           mac_prefix = nobj.mac_prefix
13471
13472         return mac_prefix
13473
13474       new_prefix = get_net_prefix(new_net_uuid)
13475       old_prefix = get_net_prefix(old_net_uuid)
13476       if old_prefix != new_prefix:
13477         params[constants.INIC_MAC] = \
13478           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13479
13480     #if there is a change in nic's ip/network configuration
13481     new_ip = params.get(constants.INIC_IP, old_ip)
13482     if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13483       if new_ip:
13484         if new_ip.lower() == constants.NIC_IP_POOL:
13485           if not new_net_uuid:
13486             raise errors.OpPrereqError("ip=pool, but no network found",
13487                                        errors.ECODE_INVAL)
13488           try:
13489             new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13490           except errors.ReservationError:
13491             raise errors.OpPrereqError("Unable to get a free IP"
13492                                        " from the address pool",
13493                                        errors.ECODE_STATE)
13494           self.LogInfo("Chose IP %s from network %s", new_ip, new_net_obj.name)
13495           params[constants.INIC_IP] = new_ip
13496         elif new_ip != old_ip or new_net_uuid != old_net_uuid:
13497           try:
13498             self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13499             self.LogInfo("Reserving IP %s in network %s",
13500                          new_ip, new_net_obj.name)
13501           except errors.ReservationError:
13502             raise errors.OpPrereqError("IP %s not available in network %s" %
13503                                        (new_ip, new_net_obj.name),
13504                                        errors.ECODE_NOTUNIQUE)
13505
13506         # new net is None
13507         elif not new_net_uuid and self.op.conflicts_check:
13508           _CheckForConflictingIp(self, new_ip, pnode)
13509
13510       if old_ip:
13511         try:
13512           self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13513         except errors.AddressPoolError:
13514           logging.warning("Release IP %s not contained in network %s",
13515                           old_ip, old_net_obj.name)
13516
13517     # there are no changes in (net, ip) tuple
13518     elif (old_net_uuid is not None and
13519           (req_link is not None or req_mode is not None)):
13520       raise errors.OpPrereqError("Not allowed to change link or mode of"
13521                                  " a NIC that is connected to a network",
13522                                  errors.ECODE_INVAL)
13523
13524     private.params = new_params
13525     private.filled = new_filled_params
13526
13527   def _PreCheckDiskTemplate(self, pnode_info):
13528     """CheckPrereq checks related to a new disk template."""
13529     # Arguments are passed to avoid configuration lookups
13530     instance = self.instance
13531     pnode = instance.primary_node
13532     cluster = self.cluster
13533     if instance.disk_template == self.op.disk_template:
13534       raise errors.OpPrereqError("Instance already has disk template %s" %
13535                                  instance.disk_template, errors.ECODE_INVAL)
13536
13537     if (instance.disk_template,
13538         self.op.disk_template) not in self._DISK_CONVERSIONS:
13539       raise errors.OpPrereqError("Unsupported disk template conversion from"
13540                                  " %s to %s" % (instance.disk_template,
13541                                                 self.op.disk_template),
13542                                  errors.ECODE_INVAL)
13543     _CheckInstanceState(self, instance, INSTANCE_DOWN,
13544                         msg="cannot change disk template")
13545     if self.op.disk_template in constants.DTS_INT_MIRROR:
13546       if self.op.remote_node == pnode:
13547         raise errors.OpPrereqError("Given new secondary node %s is the same"
13548                                    " as the primary node of the instance" %
13549                                    self.op.remote_node, errors.ECODE_STATE)
13550       _CheckNodeOnline(self, self.op.remote_node)
13551       _CheckNodeNotDrained(self, self.op.remote_node)
13552       # FIXME: here we assume that the old instance type is DT_PLAIN
13553       assert instance.disk_template == constants.DT_PLAIN
13554       disks = [{constants.IDISK_SIZE: d.size,
13555                 constants.IDISK_VG: d.logical_id[0]}
13556                for d in instance.disks]
13557       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13558       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13559
13560       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13561       snode_group = self.cfg.GetNodeGroup(snode_info.group)
13562       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13563                                                               snode_group)
13564       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13565                               ignore=self.op.ignore_ipolicy)
13566       if pnode_info.group != snode_info.group:
13567         self.LogWarning("The primary and secondary nodes are in two"
13568                         " different node groups; the disk parameters"
13569                         " from the first disk's node group will be"
13570                         " used")
13571
13572     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13573       # Make sure none of the nodes require exclusive storage
13574       nodes = [pnode_info]
13575       if self.op.disk_template in constants.DTS_INT_MIRROR:
13576         assert snode_info
13577         nodes.append(snode_info)
13578       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13579       if compat.any(map(has_es, nodes)):
13580         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13581                   " storage is enabled" % (instance.disk_template,
13582                                            self.op.disk_template))
13583         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13584
13585   def CheckPrereq(self):
13586     """Check prerequisites.
13587
13588     This only checks the instance list against the existing names.
13589
13590     """
13591     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13592     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13593
13594     cluster = self.cluster = self.cfg.GetClusterInfo()
13595     assert self.instance is not None, \
13596       "Cannot retrieve locked instance %s" % self.op.instance_name
13597
13598     pnode = instance.primary_node
13599     assert pnode in self.owned_locks(locking.LEVEL_NODE)
13600     nodelist = list(instance.all_nodes)
13601     pnode_info = self.cfg.GetNodeInfo(pnode)
13602     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13603
13604     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13605     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13606     group_info = self.cfg.GetNodeGroup(pnode_info.group)
13607
13608     # dictionary with instance information after the modification
13609     ispec = {}
13610
13611     # Check disk modifications. This is done here and not in CheckArguments
13612     # (as with NICs), because we need to know the instance's disk template
13613     if instance.disk_template == constants.DT_EXT:
13614       self._CheckMods("disk", self.op.disks, {},
13615                       self._VerifyDiskModification)
13616     else:
13617       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13618                       self._VerifyDiskModification)
13619
13620     # Prepare disk/NIC modifications
13621     self.diskmod = PrepareContainerMods(self.op.disks, None)
13622     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13623
13624     # Check the validity of the `provider' parameter
13625     if instance.disk_template in constants.DT_EXT:
13626       for mod in self.diskmod:
13627         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13628         if mod[0] == constants.DDM_ADD:
13629           if ext_provider is None:
13630             raise errors.OpPrereqError("Instance template is '%s' and parameter"
13631                                        " '%s' missing, during disk add" %
13632                                        (constants.DT_EXT,
13633                                         constants.IDISK_PROVIDER),
13634                                        errors.ECODE_NOENT)
13635         elif mod[0] == constants.DDM_MODIFY:
13636           if ext_provider:
13637             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13638                                        " modification" %
13639                                        constants.IDISK_PROVIDER,
13640                                        errors.ECODE_INVAL)
13641     else:
13642       for mod in self.diskmod:
13643         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13644         if ext_provider is not None:
13645           raise errors.OpPrereqError("Parameter '%s' is only valid for"
13646                                      " instances of type '%s'" %
13647                                      (constants.IDISK_PROVIDER,
13648                                       constants.DT_EXT),
13649                                      errors.ECODE_INVAL)
13650
13651     # OS change
13652     if self.op.os_name and not self.op.force:
13653       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13654                       self.op.force_variant)
13655       instance_os = self.op.os_name
13656     else:
13657       instance_os = instance.os
13658
13659     assert not (self.op.disk_template and self.op.disks), \
13660       "Can't modify disk template and apply disk changes at the same time"
13661
13662     if self.op.disk_template:
13663       self._PreCheckDiskTemplate(pnode_info)
13664
13665     # hvparams processing
13666     if self.op.hvparams:
13667       hv_type = instance.hypervisor
13668       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13669       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13670       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13671
13672       # local check
13673       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13674       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13675       self.hv_proposed = self.hv_new = hv_new # the new actual values
13676       self.hv_inst = i_hvdict # the new dict (without defaults)
13677     else:
13678       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13679                                               instance.hvparams)
13680       self.hv_new = self.hv_inst = {}
13681
13682     # beparams processing
13683     if self.op.beparams:
13684       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13685                                    use_none=True)
13686       objects.UpgradeBeParams(i_bedict)
13687       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13688       be_new = cluster.SimpleFillBE(i_bedict)
13689       self.be_proposed = self.be_new = be_new # the new actual values
13690       self.be_inst = i_bedict # the new dict (without defaults)
13691     else:
13692       self.be_new = self.be_inst = {}
13693       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13694     be_old = cluster.FillBE(instance)
13695
13696     # CPU param validation -- checking every time a parameter is
13697     # changed to cover all cases where either CPU mask or vcpus have
13698     # changed
13699     if (constants.BE_VCPUS in self.be_proposed and
13700         constants.HV_CPU_MASK in self.hv_proposed):
13701       cpu_list = \
13702         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13703       # Verify mask is consistent with number of vCPUs. Can skip this
13704       # test if only 1 entry in the CPU mask, which means same mask
13705       # is applied to all vCPUs.
13706       if (len(cpu_list) > 1 and
13707           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13708         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13709                                    " CPU mask [%s]" %
13710                                    (self.be_proposed[constants.BE_VCPUS],
13711                                     self.hv_proposed[constants.HV_CPU_MASK]),
13712                                    errors.ECODE_INVAL)
13713
13714       # Only perform this test if a new CPU mask is given
13715       if constants.HV_CPU_MASK in self.hv_new:
13716         # Calculate the largest CPU number requested
13717         max_requested_cpu = max(map(max, cpu_list))
13718         # Check that all of the instance's nodes have enough physical CPUs to
13719         # satisfy the requested CPU mask
13720         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13721                                 max_requested_cpu + 1, instance.hypervisor)
13722
13723     # osparams processing
13724     if self.op.osparams:
13725       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13726       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13727       self.os_inst = i_osdict # the new dict (without defaults)
13728     else:
13729       self.os_inst = {}
13730
13731     self.warn = []
13732
13733     #TODO(dynmem): do the appropriate check involving MINMEM
13734     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13735         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13736       mem_check_list = [pnode]
13737       if be_new[constants.BE_AUTO_BALANCE]:
13738         # either we changed auto_balance to yes or it was from before
13739         mem_check_list.extend(instance.secondary_nodes)
13740       instance_info = self.rpc.call_instance_info(pnode, instance.name,
13741                                                   instance.hypervisor)
13742       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13743                                          [instance.hypervisor], False)
13744       pninfo = nodeinfo[pnode]
13745       msg = pninfo.fail_msg
13746       if msg:
13747         # Assume the primary node is unreachable and go ahead
13748         self.warn.append("Can't get info from primary node %s: %s" %
13749                          (pnode, msg))
13750       else:
13751         (_, _, (pnhvinfo, )) = pninfo.payload
13752         if not isinstance(pnhvinfo.get("memory_free", None), int):
13753           self.warn.append("Node data from primary node %s doesn't contain"
13754                            " free memory information" % pnode)
13755         elif instance_info.fail_msg:
13756           self.warn.append("Can't get instance runtime information: %s" %
13757                            instance_info.fail_msg)
13758         else:
13759           if instance_info.payload:
13760             current_mem = int(instance_info.payload["memory"])
13761           else:
13762             # Assume instance not running
13763             # (there is a slight race condition here, but it's not very
13764             # probable, and we have no other way to check)
13765             # TODO: Describe race condition
13766             current_mem = 0
13767           #TODO(dynmem): do the appropriate check involving MINMEM
13768           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13769                       pnhvinfo["memory_free"])
13770           if miss_mem > 0:
13771             raise errors.OpPrereqError("This change will prevent the instance"
13772                                        " from starting, due to %d MB of memory"
13773                                        " missing on its primary node" %
13774                                        miss_mem, errors.ECODE_NORES)
13775
13776       if be_new[constants.BE_AUTO_BALANCE]:
13777         for node, nres in nodeinfo.items():
13778           if node not in instance.secondary_nodes:
13779             continue
13780           nres.Raise("Can't get info from secondary node %s" % node,
13781                      prereq=True, ecode=errors.ECODE_STATE)
13782           (_, _, (nhvinfo, )) = nres.payload
13783           if not isinstance(nhvinfo.get("memory_free", None), int):
13784             raise errors.OpPrereqError("Secondary node %s didn't return free"
13785                                        " memory information" % node,
13786                                        errors.ECODE_STATE)
13787           #TODO(dynmem): do the appropriate check involving MINMEM
13788           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13789             raise errors.OpPrereqError("This change will prevent the instance"
13790                                        " from failover to its secondary node"
13791                                        " %s, due to not enough memory" % node,
13792                                        errors.ECODE_STATE)
13793
13794     if self.op.runtime_mem:
13795       remote_info = self.rpc.call_instance_info(instance.primary_node,
13796                                                 instance.name,
13797                                                 instance.hypervisor)
13798       remote_info.Raise("Error checking node %s" % instance.primary_node)
13799       if not remote_info.payload: # not running already
13800         raise errors.OpPrereqError("Instance %s is not running" %
13801                                    instance.name, errors.ECODE_STATE)
13802
13803       current_memory = remote_info.payload["memory"]
13804       if (not self.op.force and
13805            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13806             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13807         raise errors.OpPrereqError("Instance %s must have memory between %d"
13808                                    " and %d MB of memory unless --force is"
13809                                    " given" %
13810                                    (instance.name,
13811                                     self.be_proposed[constants.BE_MINMEM],
13812                                     self.be_proposed[constants.BE_MAXMEM]),
13813                                    errors.ECODE_INVAL)
13814
13815       delta = self.op.runtime_mem - current_memory
13816       if delta > 0:
13817         _CheckNodeFreeMemory(self, instance.primary_node,
13818                              "ballooning memory for instance %s" %
13819                              instance.name, delta, instance.hypervisor)
13820
13821     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13822       raise errors.OpPrereqError("Disk operations not supported for"
13823                                  " diskless instances", errors.ECODE_INVAL)
13824
13825     def _PrepareNicCreate(_, params, private):
13826       self._PrepareNicModification(params, private, None, None,
13827                                    {}, cluster, pnode)
13828       return (None, None)
13829
13830     def _PrepareNicMod(_, nic, params, private):
13831       self._PrepareNicModification(params, private, nic.ip, nic.network,
13832                                    nic.nicparams, cluster, pnode)
13833       return None
13834
13835     def _PrepareNicRemove(_, params, __):
13836       ip = params.ip
13837       net = params.network
13838       if net is not None and ip is not None:
13839         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13840
13841     # Verify NIC changes (operating on copy)
13842     nics = instance.nics[:]
13843     ApplyContainerMods("NIC", nics, None, self.nicmod,
13844                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13845     if len(nics) > constants.MAX_NICS:
13846       raise errors.OpPrereqError("Instance has too many network interfaces"
13847                                  " (%d), cannot add more" % constants.MAX_NICS,
13848                                  errors.ECODE_STATE)
13849
13850     # Verify disk changes (operating on a copy)
13851     disks = instance.disks[:]
13852     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13853     if len(disks) > constants.MAX_DISKS:
13854       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13855                                  " more" % constants.MAX_DISKS,
13856                                  errors.ECODE_STATE)
13857     disk_sizes = [disk.size for disk in instance.disks]
13858     disk_sizes.extend(params["size"] for (op, idx, params, private) in
13859                       self.diskmod if op == constants.DDM_ADD)
13860     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13861     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13862
13863     if self.op.offline is not None and self.op.offline:
13864       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13865                           msg="can't change to offline")
13866
13867     # Pre-compute NIC changes (necessary to use result in hooks)
13868     self._nic_chgdesc = []
13869     if self.nicmod:
13870       # Operate on copies as this is still in prereq
13871       nics = [nic.Copy() for nic in instance.nics]
13872       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13873                          self._CreateNewNic, self._ApplyNicMods, None)
13874       self._new_nics = nics
13875       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13876     else:
13877       self._new_nics = None
13878       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13879
13880     if not self.op.ignore_ipolicy:
13881       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13882                                                               group_info)
13883
13884       # Fill ispec with backend parameters
13885       ispec[constants.ISPEC_SPINDLE_USE] = \
13886         self.be_new.get(constants.BE_SPINDLE_USE, None)
13887       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13888                                                          None)
13889
13890       # Copy ispec to verify parameters with min/max values separately
13891       ispec_max = ispec.copy()
13892       ispec_max[constants.ISPEC_MEM_SIZE] = \
13893         self.be_new.get(constants.BE_MAXMEM, None)
13894       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13895       ispec_min = ispec.copy()
13896       ispec_min[constants.ISPEC_MEM_SIZE] = \
13897         self.be_new.get(constants.BE_MINMEM, None)
13898       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13899
13900       if (res_max or res_min):
13901         # FIXME: Improve error message by including information about whether
13902         # the upper or lower limit of the parameter fails the ipolicy.
13903         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13904                (group_info, group_info.name,
13905                 utils.CommaJoin(set(res_max + res_min))))
13906         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13907
13908   def _ConvertPlainToDrbd(self, feedback_fn):
13909     """Converts an instance from plain to drbd.
13910
13911     """
13912     feedback_fn("Converting template to drbd")
13913     instance = self.instance
13914     pnode = instance.primary_node
13915     snode = self.op.remote_node
13916
13917     assert instance.disk_template == constants.DT_PLAIN
13918
13919     # create a fake disk info for _GenerateDiskTemplate
13920     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13921                   constants.IDISK_VG: d.logical_id[0]}
13922                  for d in instance.disks]
13923     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13924                                       instance.name, pnode, [snode],
13925                                       disk_info, None, None, 0, feedback_fn,
13926                                       self.diskparams)
13927     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13928                                         self.diskparams)
13929     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13930     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13931     info = _GetInstanceInfoText(instance)
13932     feedback_fn("Creating additional volumes...")
13933     # first, create the missing data and meta devices
13934     for disk in anno_disks:
13935       # unfortunately this is... not too nice
13936       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13937                             info, True, p_excl_stor)
13938       for child in disk.children:
13939         _CreateSingleBlockDev(self, snode, instance, child, info, True,
13940                               s_excl_stor)
13941     # at this stage, all new LVs have been created, we can rename the
13942     # old ones
13943     feedback_fn("Renaming original volumes...")
13944     rename_list = [(o, n.children[0].logical_id)
13945                    for (o, n) in zip(instance.disks, new_disks)]
13946     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13947     result.Raise("Failed to rename original LVs")
13948
13949     feedback_fn("Initializing DRBD devices...")
13950     # all child devices are in place, we can now create the DRBD devices
13951     for disk in anno_disks:
13952       for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13953         f_create = node == pnode
13954         _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13955                               excl_stor)
13956
13957     # at this point, the instance has been modified
13958     instance.disk_template = constants.DT_DRBD8
13959     instance.disks = new_disks
13960     self.cfg.Update(instance, feedback_fn)
13961
13962     # Release node locks while waiting for sync
13963     _ReleaseLocks(self, locking.LEVEL_NODE)
13964
13965     # disks are created, waiting for sync
13966     disk_abort = not _WaitForSync(self, instance,
13967                                   oneshot=not self.op.wait_for_sync)
13968     if disk_abort:
13969       raise errors.OpExecError("There are some degraded disks for"
13970                                " this instance, please cleanup manually")
13971
13972     # Node resource locks will be released by caller
13973
13974   def _ConvertDrbdToPlain(self, feedback_fn):
13975     """Converts an instance from drbd to plain.
13976
13977     """
13978     instance = self.instance
13979
13980     assert len(instance.secondary_nodes) == 1
13981     assert instance.disk_template == constants.DT_DRBD8
13982
13983     pnode = instance.primary_node
13984     snode = instance.secondary_nodes[0]
13985     feedback_fn("Converting template to plain")
13986
13987     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13988     new_disks = [d.children[0] for d in instance.disks]
13989
13990     # copy over size and mode
13991     for parent, child in zip(old_disks, new_disks):
13992       child.size = parent.size
13993       child.mode = parent.mode
13994
13995     # this is a DRBD disk, return its port to the pool
13996     # NOTE: this must be done right before the call to cfg.Update!
13997     for disk in old_disks:
13998       tcp_port = disk.logical_id[2]
13999       self.cfg.AddTcpUdpPort(tcp_port)
14000
14001     # update instance structure
14002     instance.disks = new_disks
14003     instance.disk_template = constants.DT_PLAIN
14004     self.cfg.Update(instance, feedback_fn)
14005
14006     # Release locks in case removing disks takes a while
14007     _ReleaseLocks(self, locking.LEVEL_NODE)
14008
14009     feedback_fn("Removing volumes on the secondary node...")
14010     for disk in old_disks:
14011       self.cfg.SetDiskID(disk, snode)
14012       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14013       if msg:
14014         self.LogWarning("Could not remove block device %s on node %s,"
14015                         " continuing anyway: %s", disk.iv_name, snode, msg)
14016
14017     feedback_fn("Removing unneeded volumes on the primary node...")
14018     for idx, disk in enumerate(old_disks):
14019       meta = disk.children[1]
14020       self.cfg.SetDiskID(meta, pnode)
14021       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14022       if msg:
14023         self.LogWarning("Could not remove metadata for disk %d on node %s,"
14024                         " continuing anyway: %s", idx, pnode, msg)
14025
14026   def _CreateNewDisk(self, idx, params, _):
14027     """Creates a new disk.
14028
14029     """
14030     instance = self.instance
14031
14032     # add a new disk
14033     if instance.disk_template in constants.DTS_FILEBASED:
14034       (file_driver, file_path) = instance.disks[0].logical_id
14035       file_path = os.path.dirname(file_path)
14036     else:
14037       file_driver = file_path = None
14038
14039     disk = \
14040       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14041                             instance.primary_node, instance.secondary_nodes,
14042                             [params], file_path, file_driver, idx,
14043                             self.Log, self.diskparams)[0]
14044
14045     info = _GetInstanceInfoText(instance)
14046
14047     logging.info("Creating volume %s for instance %s",
14048                  disk.iv_name, instance.name)
14049     # Note: this needs to be kept in sync with _CreateDisks
14050     #HARDCODE
14051     for node in instance.all_nodes:
14052       f_create = (node == instance.primary_node)
14053       try:
14054         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14055       except errors.OpExecError, err:
14056         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14057                         disk.iv_name, disk, node, err)
14058
14059     return (disk, [
14060       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14061       ])
14062
14063   @staticmethod
14064   def _ModifyDisk(idx, disk, params, _):
14065     """Modifies a disk.
14066
14067     """
14068     disk.mode = params[constants.IDISK_MODE]
14069
14070     return [
14071       ("disk.mode/%d" % idx, disk.mode),
14072       ]
14073
14074   def _RemoveDisk(self, idx, root, _):
14075     """Removes a disk.
14076
14077     """
14078     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14079     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14080       self.cfg.SetDiskID(disk, node)
14081       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14082       if msg:
14083         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14084                         " continuing anyway", idx, node, msg)
14085
14086     # if this is a DRBD disk, return its port to the pool
14087     if root.dev_type in constants.LDS_DRBD:
14088       self.cfg.AddTcpUdpPort(root.logical_id[2])
14089
14090   @staticmethod
14091   def _CreateNewNic(idx, params, private):
14092     """Creates data structure for a new network interface.
14093
14094     """
14095     mac = params[constants.INIC_MAC]
14096     ip = params.get(constants.INIC_IP, None)
14097     net = params.get(constants.INIC_NETWORK, None)
14098     #TODO: not private.filled?? can a nic have no nicparams??
14099     nicparams = private.filled
14100
14101     return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14102       ("nic.%d" % idx,
14103        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14104        (mac, ip, private.filled[constants.NIC_MODE],
14105        private.filled[constants.NIC_LINK],
14106        net)),
14107       ])
14108
14109   @staticmethod
14110   def _ApplyNicMods(idx, nic, params, private):
14111     """Modifies a network interface.
14112
14113     """
14114     changes = []
14115
14116     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14117       if key in params:
14118         changes.append(("nic.%s/%d" % (key, idx), params[key]))
14119         setattr(nic, key, params[key])
14120
14121     if private.filled:
14122       nic.nicparams = private.filled
14123
14124       for (key, val) in nic.nicparams.items():
14125         changes.append(("nic.%s/%d" % (key, idx), val))
14126
14127     return changes
14128
14129   def Exec(self, feedback_fn):
14130     """Modifies an instance.
14131
14132     All parameters take effect only at the next restart of the instance.
14133
14134     """
14135     # Process here the warnings from CheckPrereq, as we don't have a
14136     # feedback_fn there.
14137     # TODO: Replace with self.LogWarning
14138     for warn in self.warn:
14139       feedback_fn("WARNING: %s" % warn)
14140
14141     assert ((self.op.disk_template is None) ^
14142             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14143       "Not owning any node resource locks"
14144
14145     result = []
14146     instance = self.instance
14147
14148     # runtime memory
14149     if self.op.runtime_mem:
14150       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14151                                                      instance,
14152                                                      self.op.runtime_mem)
14153       rpcres.Raise("Cannot modify instance runtime memory")
14154       result.append(("runtime_memory", self.op.runtime_mem))
14155
14156     # Apply disk changes
14157     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14158                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14159     _UpdateIvNames(0, instance.disks)
14160
14161     if self.op.disk_template:
14162       if __debug__:
14163         check_nodes = set(instance.all_nodes)
14164         if self.op.remote_node:
14165           check_nodes.add(self.op.remote_node)
14166         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14167           owned = self.owned_locks(level)
14168           assert not (check_nodes - owned), \
14169             ("Not owning the correct locks, owning %r, expected at least %r" %
14170              (owned, check_nodes))
14171
14172       r_shut = _ShutdownInstanceDisks(self, instance)
14173       if not r_shut:
14174         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14175                                  " proceed with disk template conversion")
14176       mode = (instance.disk_template, self.op.disk_template)
14177       try:
14178         self._DISK_CONVERSIONS[mode](self, feedback_fn)
14179       except:
14180         self.cfg.ReleaseDRBDMinors(instance.name)
14181         raise
14182       result.append(("disk_template", self.op.disk_template))
14183
14184       assert instance.disk_template == self.op.disk_template, \
14185         ("Expected disk template '%s', found '%s'" %
14186          (self.op.disk_template, instance.disk_template))
14187
14188     # Release node and resource locks if there are any (they might already have
14189     # been released during disk conversion)
14190     _ReleaseLocks(self, locking.LEVEL_NODE)
14191     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14192
14193     # Apply NIC changes
14194     if self._new_nics is not None:
14195       instance.nics = self._new_nics
14196       result.extend(self._nic_chgdesc)
14197
14198     # hvparams changes
14199     if self.op.hvparams:
14200       instance.hvparams = self.hv_inst
14201       for key, val in self.op.hvparams.iteritems():
14202         result.append(("hv/%s" % key, val))
14203
14204     # beparams changes
14205     if self.op.beparams:
14206       instance.beparams = self.be_inst
14207       for key, val in self.op.beparams.iteritems():
14208         result.append(("be/%s" % key, val))
14209
14210     # OS change
14211     if self.op.os_name:
14212       instance.os = self.op.os_name
14213
14214     # osparams changes
14215     if self.op.osparams:
14216       instance.osparams = self.os_inst
14217       for key, val in self.op.osparams.iteritems():
14218         result.append(("os/%s" % key, val))
14219
14220     if self.op.offline is None:
14221       # Ignore
14222       pass
14223     elif self.op.offline:
14224       # Mark instance as offline
14225       self.cfg.MarkInstanceOffline(instance.name)
14226       result.append(("admin_state", constants.ADMINST_OFFLINE))
14227     else:
14228       # Mark instance as online, but stopped
14229       self.cfg.MarkInstanceDown(instance.name)
14230       result.append(("admin_state", constants.ADMINST_DOWN))
14231
14232     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14233
14234     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14235                 self.owned_locks(locking.LEVEL_NODE)), \
14236       "All node locks should have been released by now"
14237
14238     return result
14239
14240   _DISK_CONVERSIONS = {
14241     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14242     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14243     }
14244
14245
14246 class LUInstanceChangeGroup(LogicalUnit):
14247   HPATH = "instance-change-group"
14248   HTYPE = constants.HTYPE_INSTANCE
14249   REQ_BGL = False
14250
14251   def ExpandNames(self):
14252     self.share_locks = _ShareAll()
14253
14254     self.needed_locks = {
14255       locking.LEVEL_NODEGROUP: [],
14256       locking.LEVEL_NODE: [],
14257       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14258       }
14259
14260     self._ExpandAndLockInstance()
14261
14262     if self.op.target_groups:
14263       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14264                                   self.op.target_groups)
14265     else:
14266       self.req_target_uuids = None
14267
14268     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14269
14270   def DeclareLocks(self, level):
14271     if level == locking.LEVEL_NODEGROUP:
14272       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14273
14274       if self.req_target_uuids:
14275         lock_groups = set(self.req_target_uuids)
14276
14277         # Lock all groups used by instance optimistically; this requires going
14278         # via the node before it's locked, requiring verification later on
14279         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14280         lock_groups.update(instance_groups)
14281       else:
14282         # No target groups, need to lock all of them
14283         lock_groups = locking.ALL_SET
14284
14285       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14286
14287     elif level == locking.LEVEL_NODE:
14288       if self.req_target_uuids:
14289         # Lock all nodes used by instances
14290         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14291         self._LockInstancesNodes()
14292
14293         # Lock all nodes in all potential target groups
14294         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14295                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14296         member_nodes = [node_name
14297                         for group in lock_groups
14298                         for node_name in self.cfg.GetNodeGroup(group).members]
14299         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14300       else:
14301         # Lock all nodes as all groups are potential targets
14302         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14303
14304   def CheckPrereq(self):
14305     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14306     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14307     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14308
14309     assert (self.req_target_uuids is None or
14310             owned_groups.issuperset(self.req_target_uuids))
14311     assert owned_instances == set([self.op.instance_name])
14312
14313     # Get instance information
14314     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14315
14316     # Check if node groups for locked instance are still correct
14317     assert owned_nodes.issuperset(self.instance.all_nodes), \
14318       ("Instance %s's nodes changed while we kept the lock" %
14319        self.op.instance_name)
14320
14321     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14322                                            owned_groups)
14323
14324     if self.req_target_uuids:
14325       # User requested specific target groups
14326       self.target_uuids = frozenset(self.req_target_uuids)
14327     else:
14328       # All groups except those used by the instance are potential targets
14329       self.target_uuids = owned_groups - inst_groups
14330
14331     conflicting_groups = self.target_uuids & inst_groups
14332     if conflicting_groups:
14333       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14334                                  " used by the instance '%s'" %
14335                                  (utils.CommaJoin(conflicting_groups),
14336                                   self.op.instance_name),
14337                                  errors.ECODE_INVAL)
14338
14339     if not self.target_uuids:
14340       raise errors.OpPrereqError("There are no possible target groups",
14341                                  errors.ECODE_INVAL)
14342
14343   def BuildHooksEnv(self):
14344     """Build hooks env.
14345
14346     """
14347     assert self.target_uuids
14348
14349     env = {
14350       "TARGET_GROUPS": " ".join(self.target_uuids),
14351       }
14352
14353     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14354
14355     return env
14356
14357   def BuildHooksNodes(self):
14358     """Build hooks nodes.
14359
14360     """
14361     mn = self.cfg.GetMasterNode()
14362     return ([mn], [mn])
14363
14364   def Exec(self, feedback_fn):
14365     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14366
14367     assert instances == [self.op.instance_name], "Instance not locked"
14368
14369     req = iallocator.IAReqGroupChange(instances=instances,
14370                                       target_groups=list(self.target_uuids))
14371     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14372
14373     ial.Run(self.op.iallocator)
14374
14375     if not ial.success:
14376       raise errors.OpPrereqError("Can't compute solution for changing group of"
14377                                  " instance '%s' using iallocator '%s': %s" %
14378                                  (self.op.instance_name, self.op.iallocator,
14379                                   ial.info), errors.ECODE_NORES)
14380
14381     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14382
14383     self.LogInfo("Iallocator returned %s job(s) for changing group of"
14384                  " instance '%s'", len(jobs), self.op.instance_name)
14385
14386     return ResultWithJobs(jobs)
14387
14388
14389 class LUBackupQuery(NoHooksLU):
14390   """Query the exports list
14391
14392   """
14393   REQ_BGL = False
14394
14395   def CheckArguments(self):
14396     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14397                              ["node", "export"], self.op.use_locking)
14398
14399   def ExpandNames(self):
14400     self.expq.ExpandNames(self)
14401
14402   def DeclareLocks(self, level):
14403     self.expq.DeclareLocks(self, level)
14404
14405   def Exec(self, feedback_fn):
14406     result = {}
14407
14408     for (node, expname) in self.expq.OldStyleQuery(self):
14409       if expname is None:
14410         result[node] = False
14411       else:
14412         result.setdefault(node, []).append(expname)
14413
14414     return result
14415
14416
14417 class _ExportQuery(_QueryBase):
14418   FIELDS = query.EXPORT_FIELDS
14419
14420   #: The node name is not a unique key for this query
14421   SORT_FIELD = "node"
14422
14423   def ExpandNames(self, lu):
14424     lu.needed_locks = {}
14425
14426     # The following variables interact with _QueryBase._GetNames
14427     if self.names:
14428       self.wanted = _GetWantedNodes(lu, self.names)
14429     else:
14430       self.wanted = locking.ALL_SET
14431
14432     self.do_locking = self.use_locking
14433
14434     if self.do_locking:
14435       lu.share_locks = _ShareAll()
14436       lu.needed_locks = {
14437         locking.LEVEL_NODE: self.wanted,
14438         }
14439
14440       if not self.names:
14441         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14442
14443   def DeclareLocks(self, lu, level):
14444     pass
14445
14446   def _GetQueryData(self, lu):
14447     """Computes the list of nodes and their attributes.
14448
14449     """
14450     # Locking is not used
14451     # TODO
14452     assert not (compat.any(lu.glm.is_owned(level)
14453                            for level in locking.LEVELS
14454                            if level != locking.LEVEL_CLUSTER) or
14455                 self.do_locking or self.use_locking)
14456
14457     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14458
14459     result = []
14460
14461     for (node, nres) in lu.rpc.call_export_list(nodes).items():
14462       if nres.fail_msg:
14463         result.append((node, None))
14464       else:
14465         result.extend((node, expname) for expname in nres.payload)
14466
14467     return result
14468
14469
14470 class LUBackupPrepare(NoHooksLU):
14471   """Prepares an instance for an export and returns useful information.
14472
14473   """
14474   REQ_BGL = False
14475
14476   def ExpandNames(self):
14477     self._ExpandAndLockInstance()
14478
14479   def CheckPrereq(self):
14480     """Check prerequisites.
14481
14482     """
14483     instance_name = self.op.instance_name
14484
14485     self.instance = self.cfg.GetInstanceInfo(instance_name)
14486     assert self.instance is not None, \
14487           "Cannot retrieve locked instance %s" % self.op.instance_name
14488     _CheckNodeOnline(self, self.instance.primary_node)
14489
14490     self._cds = _GetClusterDomainSecret()
14491
14492   def Exec(self, feedback_fn):
14493     """Prepares an instance for an export.
14494
14495     """
14496     instance = self.instance
14497
14498     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14499       salt = utils.GenerateSecret(8)
14500
14501       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14502       result = self.rpc.call_x509_cert_create(instance.primary_node,
14503                                               constants.RIE_CERT_VALIDITY)
14504       result.Raise("Can't create X509 key and certificate on %s" % result.node)
14505
14506       (name, cert_pem) = result.payload
14507
14508       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14509                                              cert_pem)
14510
14511       return {
14512         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14513         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14514                           salt),
14515         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14516         }
14517
14518     return None
14519
14520
14521 class LUBackupExport(LogicalUnit):
14522   """Export an instance to an image in the cluster.
14523
14524   """
14525   HPATH = "instance-export"
14526   HTYPE = constants.HTYPE_INSTANCE
14527   REQ_BGL = False
14528
14529   def CheckArguments(self):
14530     """Check the arguments.
14531
14532     """
14533     self.x509_key_name = self.op.x509_key_name
14534     self.dest_x509_ca_pem = self.op.destination_x509_ca
14535
14536     if self.op.mode == constants.EXPORT_MODE_REMOTE:
14537       if not self.x509_key_name:
14538         raise errors.OpPrereqError("Missing X509 key name for encryption",
14539                                    errors.ECODE_INVAL)
14540
14541       if not self.dest_x509_ca_pem:
14542         raise errors.OpPrereqError("Missing destination X509 CA",
14543                                    errors.ECODE_INVAL)
14544
14545   def ExpandNames(self):
14546     self._ExpandAndLockInstance()
14547
14548     # Lock all nodes for local exports
14549     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14550       # FIXME: lock only instance primary and destination node
14551       #
14552       # Sad but true, for now we have do lock all nodes, as we don't know where
14553       # the previous export might be, and in this LU we search for it and
14554       # remove it from its current node. In the future we could fix this by:
14555       #  - making a tasklet to search (share-lock all), then create the
14556       #    new one, then one to remove, after
14557       #  - removing the removal operation altogether
14558       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14559
14560       # Allocations should be stopped while this LU runs with node locks, but
14561       # it doesn't have to be exclusive
14562       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14563       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14564
14565   def DeclareLocks(self, level):
14566     """Last minute lock declaration."""
14567     # All nodes are locked anyway, so nothing to do here.
14568
14569   def BuildHooksEnv(self):
14570     """Build hooks env.
14571
14572     This will run on the master, primary node and target node.
14573
14574     """
14575     env = {
14576       "EXPORT_MODE": self.op.mode,
14577       "EXPORT_NODE": self.op.target_node,
14578       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14579       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14580       # TODO: Generic function for boolean env variables
14581       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14582       }
14583
14584     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14585
14586     return env
14587
14588   def BuildHooksNodes(self):
14589     """Build hooks nodes.
14590
14591     """
14592     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14593
14594     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14595       nl.append(self.op.target_node)
14596
14597     return (nl, nl)
14598
14599   def CheckPrereq(self):
14600     """Check prerequisites.
14601
14602     This checks that the instance and node names are valid.
14603
14604     """
14605     instance_name = self.op.instance_name
14606
14607     self.instance = self.cfg.GetInstanceInfo(instance_name)
14608     assert self.instance is not None, \
14609           "Cannot retrieve locked instance %s" % self.op.instance_name
14610     _CheckNodeOnline(self, self.instance.primary_node)
14611
14612     if (self.op.remove_instance and
14613         self.instance.admin_state == constants.ADMINST_UP and
14614         not self.op.shutdown):
14615       raise errors.OpPrereqError("Can not remove instance without shutting it"
14616                                  " down before", errors.ECODE_STATE)
14617
14618     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14619       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14620       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14621       assert self.dst_node is not None
14622
14623       _CheckNodeOnline(self, self.dst_node.name)
14624       _CheckNodeNotDrained(self, self.dst_node.name)
14625
14626       self._cds = None
14627       self.dest_disk_info = None
14628       self.dest_x509_ca = None
14629
14630     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14631       self.dst_node = None
14632
14633       if len(self.op.target_node) != len(self.instance.disks):
14634         raise errors.OpPrereqError(("Received destination information for %s"
14635                                     " disks, but instance %s has %s disks") %
14636                                    (len(self.op.target_node), instance_name,
14637                                     len(self.instance.disks)),
14638                                    errors.ECODE_INVAL)
14639
14640       cds = _GetClusterDomainSecret()
14641
14642       # Check X509 key name
14643       try:
14644         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14645       except (TypeError, ValueError), err:
14646         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14647                                    errors.ECODE_INVAL)
14648
14649       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14650         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14651                                    errors.ECODE_INVAL)
14652
14653       # Load and verify CA
14654       try:
14655         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14656       except OpenSSL.crypto.Error, err:
14657         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14658                                    (err, ), errors.ECODE_INVAL)
14659
14660       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14661       if errcode is not None:
14662         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14663                                    (msg, ), errors.ECODE_INVAL)
14664
14665       self.dest_x509_ca = cert
14666
14667       # Verify target information
14668       disk_info = []
14669       for idx, disk_data in enumerate(self.op.target_node):
14670         try:
14671           (host, port, magic) = \
14672             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14673         except errors.GenericError, err:
14674           raise errors.OpPrereqError("Target info for disk %s: %s" %
14675                                      (idx, err), errors.ECODE_INVAL)
14676
14677         disk_info.append((host, port, magic))
14678
14679       assert len(disk_info) == len(self.op.target_node)
14680       self.dest_disk_info = disk_info
14681
14682     else:
14683       raise errors.ProgrammerError("Unhandled export mode %r" %
14684                                    self.op.mode)
14685
14686     # instance disk type verification
14687     # TODO: Implement export support for file-based disks
14688     for disk in self.instance.disks:
14689       if disk.dev_type == constants.LD_FILE:
14690         raise errors.OpPrereqError("Export not supported for instances with"
14691                                    " file-based disks", errors.ECODE_INVAL)
14692
14693   def _CleanupExports(self, feedback_fn):
14694     """Removes exports of current instance from all other nodes.
14695
14696     If an instance in a cluster with nodes A..D was exported to node C, its
14697     exports will be removed from the nodes A, B and D.
14698
14699     """
14700     assert self.op.mode != constants.EXPORT_MODE_REMOTE
14701
14702     nodelist = self.cfg.GetNodeList()
14703     nodelist.remove(self.dst_node.name)
14704
14705     # on one-node clusters nodelist will be empty after the removal
14706     # if we proceed the backup would be removed because OpBackupQuery
14707     # substitutes an empty list with the full cluster node list.
14708     iname = self.instance.name
14709     if nodelist:
14710       feedback_fn("Removing old exports for instance %s" % iname)
14711       exportlist = self.rpc.call_export_list(nodelist)
14712       for node in exportlist:
14713         if exportlist[node].fail_msg:
14714           continue
14715         if iname in exportlist[node].payload:
14716           msg = self.rpc.call_export_remove(node, iname).fail_msg
14717           if msg:
14718             self.LogWarning("Could not remove older export for instance %s"
14719                             " on node %s: %s", iname, node, msg)
14720
14721   def Exec(self, feedback_fn):
14722     """Export an instance to an image in the cluster.
14723
14724     """
14725     assert self.op.mode in constants.EXPORT_MODES
14726
14727     instance = self.instance
14728     src_node = instance.primary_node
14729
14730     if self.op.shutdown:
14731       # shutdown the instance, but not the disks
14732       feedback_fn("Shutting down instance %s" % instance.name)
14733       result = self.rpc.call_instance_shutdown(src_node, instance,
14734                                                self.op.shutdown_timeout)
14735       # TODO: Maybe ignore failures if ignore_remove_failures is set
14736       result.Raise("Could not shutdown instance %s on"
14737                    " node %s" % (instance.name, src_node))
14738
14739     # set the disks ID correctly since call_instance_start needs the
14740     # correct drbd minor to create the symlinks
14741     for disk in instance.disks:
14742       self.cfg.SetDiskID(disk, src_node)
14743
14744     activate_disks = (instance.admin_state != constants.ADMINST_UP)
14745
14746     if activate_disks:
14747       # Activate the instance disks if we'exporting a stopped instance
14748       feedback_fn("Activating disks for %s" % instance.name)
14749       _StartInstanceDisks(self, instance, None)
14750
14751     try:
14752       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14753                                                      instance)
14754
14755       helper.CreateSnapshots()
14756       try:
14757         if (self.op.shutdown and
14758             instance.admin_state == constants.ADMINST_UP and
14759             not self.op.remove_instance):
14760           assert not activate_disks
14761           feedback_fn("Starting instance %s" % instance.name)
14762           result = self.rpc.call_instance_start(src_node,
14763                                                 (instance, None, None), False)
14764           msg = result.fail_msg
14765           if msg:
14766             feedback_fn("Failed to start instance: %s" % msg)
14767             _ShutdownInstanceDisks(self, instance)
14768             raise errors.OpExecError("Could not start instance: %s" % msg)
14769
14770         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14771           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14772         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14773           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14774           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14775
14776           (key_name, _, _) = self.x509_key_name
14777
14778           dest_ca_pem = \
14779             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14780                                             self.dest_x509_ca)
14781
14782           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14783                                                      key_name, dest_ca_pem,
14784                                                      timeouts)
14785       finally:
14786         helper.Cleanup()
14787
14788       # Check for backwards compatibility
14789       assert len(dresults) == len(instance.disks)
14790       assert compat.all(isinstance(i, bool) for i in dresults), \
14791              "Not all results are boolean: %r" % dresults
14792
14793     finally:
14794       if activate_disks:
14795         feedback_fn("Deactivating disks for %s" % instance.name)
14796         _ShutdownInstanceDisks(self, instance)
14797
14798     if not (compat.all(dresults) and fin_resu):
14799       failures = []
14800       if not fin_resu:
14801         failures.append("export finalization")
14802       if not compat.all(dresults):
14803         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14804                                if not dsk)
14805         failures.append("disk export: disk(s) %s" % fdsk)
14806
14807       raise errors.OpExecError("Export failed, errors in %s" %
14808                                utils.CommaJoin(failures))
14809
14810     # At this point, the export was successful, we can cleanup/finish
14811
14812     # Remove instance if requested
14813     if self.op.remove_instance:
14814       feedback_fn("Removing instance %s" % instance.name)
14815       _RemoveInstance(self, feedback_fn, instance,
14816                       self.op.ignore_remove_failures)
14817
14818     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14819       self._CleanupExports(feedback_fn)
14820
14821     return fin_resu, dresults
14822
14823
14824 class LUBackupRemove(NoHooksLU):
14825   """Remove exports related to the named instance.
14826
14827   """
14828   REQ_BGL = False
14829
14830   def ExpandNames(self):
14831     self.needed_locks = {
14832       # We need all nodes to be locked in order for RemoveExport to work, but
14833       # we don't need to lock the instance itself, as nothing will happen to it
14834       # (and we can remove exports also for a removed instance)
14835       locking.LEVEL_NODE: locking.ALL_SET,
14836
14837       # Removing backups is quick, so blocking allocations is justified
14838       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14839       }
14840
14841     # Allocations should be stopped while this LU runs with node locks, but it
14842     # doesn't have to be exclusive
14843     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14844
14845   def Exec(self, feedback_fn):
14846     """Remove any export.
14847
14848     """
14849     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14850     # If the instance was not found we'll try with the name that was passed in.
14851     # This will only work if it was an FQDN, though.
14852     fqdn_warn = False
14853     if not instance_name:
14854       fqdn_warn = True
14855       instance_name = self.op.instance_name
14856
14857     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14858     exportlist = self.rpc.call_export_list(locked_nodes)
14859     found = False
14860     for node in exportlist:
14861       msg = exportlist[node].fail_msg
14862       if msg:
14863         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14864         continue
14865       if instance_name in exportlist[node].payload:
14866         found = True
14867         result = self.rpc.call_export_remove(node, instance_name)
14868         msg = result.fail_msg
14869         if msg:
14870           logging.error("Could not remove export for instance %s"
14871                         " on node %s: %s", instance_name, node, msg)
14872
14873     if fqdn_warn and not found:
14874       feedback_fn("Export not found. If trying to remove an export belonging"
14875                   " to a deleted instance please use its Fully Qualified"
14876                   " Domain Name.")
14877
14878
14879 class LUGroupAdd(LogicalUnit):
14880   """Logical unit for creating node groups.
14881
14882   """
14883   HPATH = "group-add"
14884   HTYPE = constants.HTYPE_GROUP
14885   REQ_BGL = False
14886
14887   def ExpandNames(self):
14888     # We need the new group's UUID here so that we can create and acquire the
14889     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14890     # that it should not check whether the UUID exists in the configuration.
14891     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14892     self.needed_locks = {}
14893     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14894
14895   def CheckPrereq(self):
14896     """Check prerequisites.
14897
14898     This checks that the given group name is not an existing node group
14899     already.
14900
14901     """
14902     try:
14903       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14904     except errors.OpPrereqError:
14905       pass
14906     else:
14907       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14908                                  " node group (UUID: %s)" %
14909                                  (self.op.group_name, existing_uuid),
14910                                  errors.ECODE_EXISTS)
14911
14912     if self.op.ndparams:
14913       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14914
14915     if self.op.hv_state:
14916       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14917     else:
14918       self.new_hv_state = None
14919
14920     if self.op.disk_state:
14921       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14922     else:
14923       self.new_disk_state = None
14924
14925     if self.op.diskparams:
14926       for templ in constants.DISK_TEMPLATES:
14927         if templ in self.op.diskparams:
14928           utils.ForceDictType(self.op.diskparams[templ],
14929                               constants.DISK_DT_TYPES)
14930       self.new_diskparams = self.op.diskparams
14931       try:
14932         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14933       except errors.OpPrereqError, err:
14934         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14935                                    errors.ECODE_INVAL)
14936     else:
14937       self.new_diskparams = {}
14938
14939     if self.op.ipolicy:
14940       cluster = self.cfg.GetClusterInfo()
14941       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14942       try:
14943         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14944       except errors.ConfigurationError, err:
14945         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14946                                    errors.ECODE_INVAL)
14947
14948   def BuildHooksEnv(self):
14949     """Build hooks env.
14950
14951     """
14952     return {
14953       "GROUP_NAME": self.op.group_name,
14954       }
14955
14956   def BuildHooksNodes(self):
14957     """Build hooks nodes.
14958
14959     """
14960     mn = self.cfg.GetMasterNode()
14961     return ([mn], [mn])
14962
14963   def Exec(self, feedback_fn):
14964     """Add the node group to the cluster.
14965
14966     """
14967     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14968                                   uuid=self.group_uuid,
14969                                   alloc_policy=self.op.alloc_policy,
14970                                   ndparams=self.op.ndparams,
14971                                   diskparams=self.new_diskparams,
14972                                   ipolicy=self.op.ipolicy,
14973                                   hv_state_static=self.new_hv_state,
14974                                   disk_state_static=self.new_disk_state)
14975
14976     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14977     del self.remove_locks[locking.LEVEL_NODEGROUP]
14978
14979
14980 class LUGroupAssignNodes(NoHooksLU):
14981   """Logical unit for assigning nodes to groups.
14982
14983   """
14984   REQ_BGL = False
14985
14986   def ExpandNames(self):
14987     # These raise errors.OpPrereqError on their own:
14988     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14989     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14990
14991     # We want to lock all the affected nodes and groups. We have readily
14992     # available the list of nodes, and the *destination* group. To gather the
14993     # list of "source" groups, we need to fetch node information later on.
14994     self.needed_locks = {
14995       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14996       locking.LEVEL_NODE: self.op.nodes,
14997       }
14998
14999   def DeclareLocks(self, level):
15000     if level == locking.LEVEL_NODEGROUP:
15001       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15002
15003       # Try to get all affected nodes' groups without having the group or node
15004       # lock yet. Needs verification later in the code flow.
15005       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15006
15007       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15008
15009   def CheckPrereq(self):
15010     """Check prerequisites.
15011
15012     """
15013     assert self.needed_locks[locking.LEVEL_NODEGROUP]
15014     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15015             frozenset(self.op.nodes))
15016
15017     expected_locks = (set([self.group_uuid]) |
15018                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15019     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15020     if actual_locks != expected_locks:
15021       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15022                                " current groups are '%s', used to be '%s'" %
15023                                (utils.CommaJoin(expected_locks),
15024                                 utils.CommaJoin(actual_locks)))
15025
15026     self.node_data = self.cfg.GetAllNodesInfo()
15027     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15028     instance_data = self.cfg.GetAllInstancesInfo()
15029
15030     if self.group is None:
15031       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15032                                (self.op.group_name, self.group_uuid))
15033
15034     (new_splits, previous_splits) = \
15035       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15036                                              for node in self.op.nodes],
15037                                             self.node_data, instance_data)
15038
15039     if new_splits:
15040       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15041
15042       if not self.op.force:
15043         raise errors.OpExecError("The following instances get split by this"
15044                                  " change and --force was not given: %s" %
15045                                  fmt_new_splits)
15046       else:
15047         self.LogWarning("This operation will split the following instances: %s",
15048                         fmt_new_splits)
15049
15050         if previous_splits:
15051           self.LogWarning("In addition, these already-split instances continue"
15052                           " to be split across groups: %s",
15053                           utils.CommaJoin(utils.NiceSort(previous_splits)))
15054
15055   def Exec(self, feedback_fn):
15056     """Assign nodes to a new group.
15057
15058     """
15059     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15060
15061     self.cfg.AssignGroupNodes(mods)
15062
15063   @staticmethod
15064   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15065     """Check for split instances after a node assignment.
15066
15067     This method considers a series of node assignments as an atomic operation,
15068     and returns information about split instances after applying the set of
15069     changes.
15070
15071     In particular, it returns information about newly split instances, and
15072     instances that were already split, and remain so after the change.
15073
15074     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15075     considered.
15076
15077     @type changes: list of (node_name, new_group_uuid) pairs.
15078     @param changes: list of node assignments to consider.
15079     @param node_data: a dict with data for all nodes
15080     @param instance_data: a dict with all instances to consider
15081     @rtype: a two-tuple
15082     @return: a list of instances that were previously okay and result split as a
15083       consequence of this change, and a list of instances that were previously
15084       split and this change does not fix.
15085
15086     """
15087     changed_nodes = dict((node, group) for node, group in changes
15088                          if node_data[node].group != group)
15089
15090     all_split_instances = set()
15091     previously_split_instances = set()
15092
15093     def InstanceNodes(instance):
15094       return [instance.primary_node] + list(instance.secondary_nodes)
15095
15096     for inst in instance_data.values():
15097       if inst.disk_template not in constants.DTS_INT_MIRROR:
15098         continue
15099
15100       instance_nodes = InstanceNodes(inst)
15101
15102       if len(set(node_data[node].group for node in instance_nodes)) > 1:
15103         previously_split_instances.add(inst.name)
15104
15105       if len(set(changed_nodes.get(node, node_data[node].group)
15106                  for node in instance_nodes)) > 1:
15107         all_split_instances.add(inst.name)
15108
15109     return (list(all_split_instances - previously_split_instances),
15110             list(previously_split_instances & all_split_instances))
15111
15112
15113 class _GroupQuery(_QueryBase):
15114   FIELDS = query.GROUP_FIELDS
15115
15116   def ExpandNames(self, lu):
15117     lu.needed_locks = {}
15118
15119     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15120     self._cluster = lu.cfg.GetClusterInfo()
15121     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15122
15123     if not self.names:
15124       self.wanted = [name_to_uuid[name]
15125                      for name in utils.NiceSort(name_to_uuid.keys())]
15126     else:
15127       # Accept names to be either names or UUIDs.
15128       missing = []
15129       self.wanted = []
15130       all_uuid = frozenset(self._all_groups.keys())
15131
15132       for name in self.names:
15133         if name in all_uuid:
15134           self.wanted.append(name)
15135         elif name in name_to_uuid:
15136           self.wanted.append(name_to_uuid[name])
15137         else:
15138           missing.append(name)
15139
15140       if missing:
15141         raise errors.OpPrereqError("Some groups do not exist: %s" %
15142                                    utils.CommaJoin(missing),
15143                                    errors.ECODE_NOENT)
15144
15145   def DeclareLocks(self, lu, level):
15146     pass
15147
15148   def _GetQueryData(self, lu):
15149     """Computes the list of node groups and their attributes.
15150
15151     """
15152     do_nodes = query.GQ_NODE in self.requested_data
15153     do_instances = query.GQ_INST in self.requested_data
15154
15155     group_to_nodes = None
15156     group_to_instances = None
15157
15158     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15159     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15160     # latter GetAllInstancesInfo() is not enough, for we have to go through
15161     # instance->node. Hence, we will need to process nodes even if we only need
15162     # instance information.
15163     if do_nodes or do_instances:
15164       all_nodes = lu.cfg.GetAllNodesInfo()
15165       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15166       node_to_group = {}
15167
15168       for node in all_nodes.values():
15169         if node.group in group_to_nodes:
15170           group_to_nodes[node.group].append(node.name)
15171           node_to_group[node.name] = node.group
15172
15173       if do_instances:
15174         all_instances = lu.cfg.GetAllInstancesInfo()
15175         group_to_instances = dict((uuid, []) for uuid in self.wanted)
15176
15177         for instance in all_instances.values():
15178           node = instance.primary_node
15179           if node in node_to_group:
15180             group_to_instances[node_to_group[node]].append(instance.name)
15181
15182         if not do_nodes:
15183           # Do not pass on node information if it was not requested.
15184           group_to_nodes = None
15185
15186     return query.GroupQueryData(self._cluster,
15187                                 [self._all_groups[uuid]
15188                                  for uuid in self.wanted],
15189                                 group_to_nodes, group_to_instances,
15190                                 query.GQ_DISKPARAMS in self.requested_data)
15191
15192
15193 class LUGroupQuery(NoHooksLU):
15194   """Logical unit for querying node groups.
15195
15196   """
15197   REQ_BGL = False
15198
15199   def CheckArguments(self):
15200     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15201                           self.op.output_fields, False)
15202
15203   def ExpandNames(self):
15204     self.gq.ExpandNames(self)
15205
15206   def DeclareLocks(self, level):
15207     self.gq.DeclareLocks(self, level)
15208
15209   def Exec(self, feedback_fn):
15210     return self.gq.OldStyleQuery(self)
15211
15212
15213 class LUGroupSetParams(LogicalUnit):
15214   """Modifies the parameters of a node group.
15215
15216   """
15217   HPATH = "group-modify"
15218   HTYPE = constants.HTYPE_GROUP
15219   REQ_BGL = False
15220
15221   def CheckArguments(self):
15222     all_changes = [
15223       self.op.ndparams,
15224       self.op.diskparams,
15225       self.op.alloc_policy,
15226       self.op.hv_state,
15227       self.op.disk_state,
15228       self.op.ipolicy,
15229       ]
15230
15231     if all_changes.count(None) == len(all_changes):
15232       raise errors.OpPrereqError("Please pass at least one modification",
15233                                  errors.ECODE_INVAL)
15234
15235   def ExpandNames(self):
15236     # This raises errors.OpPrereqError on its own:
15237     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15238
15239     self.needed_locks = {
15240       locking.LEVEL_INSTANCE: [],
15241       locking.LEVEL_NODEGROUP: [self.group_uuid],
15242       }
15243
15244     self.share_locks[locking.LEVEL_INSTANCE] = 1
15245
15246   def DeclareLocks(self, level):
15247     if level == locking.LEVEL_INSTANCE:
15248       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15249
15250       # Lock instances optimistically, needs verification once group lock has
15251       # been acquired
15252       self.needed_locks[locking.LEVEL_INSTANCE] = \
15253           self.cfg.GetNodeGroupInstances(self.group_uuid)
15254
15255   @staticmethod
15256   def _UpdateAndVerifyDiskParams(old, new):
15257     """Updates and verifies disk parameters.
15258
15259     """
15260     new_params = _GetUpdatedParams(old, new)
15261     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15262     return new_params
15263
15264   def CheckPrereq(self):
15265     """Check prerequisites.
15266
15267     """
15268     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15269
15270     # Check if locked instances are still correct
15271     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15272
15273     self.group = self.cfg.GetNodeGroup(self.group_uuid)
15274     cluster = self.cfg.GetClusterInfo()
15275
15276     if self.group is None:
15277       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15278                                (self.op.group_name, self.group_uuid))
15279
15280     if self.op.ndparams:
15281       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15282       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15283       self.new_ndparams = new_ndparams
15284
15285     if self.op.diskparams:
15286       diskparams = self.group.diskparams
15287       uavdp = self._UpdateAndVerifyDiskParams
15288       # For each disktemplate subdict update and verify the values
15289       new_diskparams = dict((dt,
15290                              uavdp(diskparams.get(dt, {}),
15291                                    self.op.diskparams[dt]))
15292                             for dt in constants.DISK_TEMPLATES
15293                             if dt in self.op.diskparams)
15294       # As we've all subdicts of diskparams ready, lets merge the actual
15295       # dict with all updated subdicts
15296       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15297       try:
15298         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15299       except errors.OpPrereqError, err:
15300         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15301                                    errors.ECODE_INVAL)
15302
15303     if self.op.hv_state:
15304       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15305                                                  self.group.hv_state_static)
15306
15307     if self.op.disk_state:
15308       self.new_disk_state = \
15309         _MergeAndVerifyDiskState(self.op.disk_state,
15310                                  self.group.disk_state_static)
15311
15312     if self.op.ipolicy:
15313       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15314                                             self.op.ipolicy,
15315                                             group_policy=True)
15316
15317       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15318       inst_filter = lambda inst: inst.name in owned_instances
15319       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15320       gmi = ganeti.masterd.instance
15321       violations = \
15322           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15323                                                                   self.group),
15324                                         new_ipolicy, instances)
15325
15326       if violations:
15327         self.LogWarning("After the ipolicy change the following instances"
15328                         " violate them: %s",
15329                         utils.CommaJoin(violations))
15330
15331   def BuildHooksEnv(self):
15332     """Build hooks env.
15333
15334     """
15335     return {
15336       "GROUP_NAME": self.op.group_name,
15337       "NEW_ALLOC_POLICY": self.op.alloc_policy,
15338       }
15339
15340   def BuildHooksNodes(self):
15341     """Build hooks nodes.
15342
15343     """
15344     mn = self.cfg.GetMasterNode()
15345     return ([mn], [mn])
15346
15347   def Exec(self, feedback_fn):
15348     """Modifies the node group.
15349
15350     """
15351     result = []
15352
15353     if self.op.ndparams:
15354       self.group.ndparams = self.new_ndparams
15355       result.append(("ndparams", str(self.group.ndparams)))
15356
15357     if self.op.diskparams:
15358       self.group.diskparams = self.new_diskparams
15359       result.append(("diskparams", str(self.group.diskparams)))
15360
15361     if self.op.alloc_policy:
15362       self.group.alloc_policy = self.op.alloc_policy
15363
15364     if self.op.hv_state:
15365       self.group.hv_state_static = self.new_hv_state
15366
15367     if self.op.disk_state:
15368       self.group.disk_state_static = self.new_disk_state
15369
15370     if self.op.ipolicy:
15371       self.group.ipolicy = self.new_ipolicy
15372
15373     self.cfg.Update(self.group, feedback_fn)
15374     return result
15375
15376
15377 class LUGroupRemove(LogicalUnit):
15378   HPATH = "group-remove"
15379   HTYPE = constants.HTYPE_GROUP
15380   REQ_BGL = False
15381
15382   def ExpandNames(self):
15383     # This will raises errors.OpPrereqError on its own:
15384     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15385     self.needed_locks = {
15386       locking.LEVEL_NODEGROUP: [self.group_uuid],
15387       }
15388
15389   def CheckPrereq(self):
15390     """Check prerequisites.
15391
15392     This checks that the given group name exists as a node group, that is
15393     empty (i.e., contains no nodes), and that is not the last group of the
15394     cluster.
15395
15396     """
15397     # Verify that the group is empty.
15398     group_nodes = [node.name
15399                    for node in self.cfg.GetAllNodesInfo().values()
15400                    if node.group == self.group_uuid]
15401
15402     if group_nodes:
15403       raise errors.OpPrereqError("Group '%s' not empty, has the following"
15404                                  " nodes: %s" %
15405                                  (self.op.group_name,
15406                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
15407                                  errors.ECODE_STATE)
15408
15409     # Verify the cluster would not be left group-less.
15410     if len(self.cfg.GetNodeGroupList()) == 1:
15411       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15412                                  " removed" % self.op.group_name,
15413                                  errors.ECODE_STATE)
15414
15415   def BuildHooksEnv(self):
15416     """Build hooks env.
15417
15418     """
15419     return {
15420       "GROUP_NAME": self.op.group_name,
15421       }
15422
15423   def BuildHooksNodes(self):
15424     """Build hooks nodes.
15425
15426     """
15427     mn = self.cfg.GetMasterNode()
15428     return ([mn], [mn])
15429
15430   def Exec(self, feedback_fn):
15431     """Remove the node group.
15432
15433     """
15434     try:
15435       self.cfg.RemoveNodeGroup(self.group_uuid)
15436     except errors.ConfigurationError:
15437       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15438                                (self.op.group_name, self.group_uuid))
15439
15440     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15441
15442
15443 class LUGroupRename(LogicalUnit):
15444   HPATH = "group-rename"
15445   HTYPE = constants.HTYPE_GROUP
15446   REQ_BGL = False
15447
15448   def ExpandNames(self):
15449     # This raises errors.OpPrereqError on its own:
15450     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15451
15452     self.needed_locks = {
15453       locking.LEVEL_NODEGROUP: [self.group_uuid],
15454       }
15455
15456   def CheckPrereq(self):
15457     """Check prerequisites.
15458
15459     Ensures requested new name is not yet used.
15460
15461     """
15462     try:
15463       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15464     except errors.OpPrereqError:
15465       pass
15466     else:
15467       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15468                                  " node group (UUID: %s)" %
15469                                  (self.op.new_name, new_name_uuid),
15470                                  errors.ECODE_EXISTS)
15471
15472   def BuildHooksEnv(self):
15473     """Build hooks env.
15474
15475     """
15476     return {
15477       "OLD_NAME": self.op.group_name,
15478       "NEW_NAME": self.op.new_name,
15479       }
15480
15481   def BuildHooksNodes(self):
15482     """Build hooks nodes.
15483
15484     """
15485     mn = self.cfg.GetMasterNode()
15486
15487     all_nodes = self.cfg.GetAllNodesInfo()
15488     all_nodes.pop(mn, None)
15489
15490     run_nodes = [mn]
15491     run_nodes.extend(node.name for node in all_nodes.values()
15492                      if node.group == self.group_uuid)
15493
15494     return (run_nodes, run_nodes)
15495
15496   def Exec(self, feedback_fn):
15497     """Rename the node group.
15498
15499     """
15500     group = self.cfg.GetNodeGroup(self.group_uuid)
15501
15502     if group is None:
15503       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15504                                (self.op.group_name, self.group_uuid))
15505
15506     group.name = self.op.new_name
15507     self.cfg.Update(group, feedback_fn)
15508
15509     return self.op.new_name
15510
15511
15512 class LUGroupEvacuate(LogicalUnit):
15513   HPATH = "group-evacuate"
15514   HTYPE = constants.HTYPE_GROUP
15515   REQ_BGL = False
15516
15517   def ExpandNames(self):
15518     # This raises errors.OpPrereqError on its own:
15519     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15520
15521     if self.op.target_groups:
15522       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15523                                   self.op.target_groups)
15524     else:
15525       self.req_target_uuids = []
15526
15527     if self.group_uuid in self.req_target_uuids:
15528       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15529                                  " as a target group (targets are %s)" %
15530                                  (self.group_uuid,
15531                                   utils.CommaJoin(self.req_target_uuids)),
15532                                  errors.ECODE_INVAL)
15533
15534     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15535
15536     self.share_locks = _ShareAll()
15537     self.needed_locks = {
15538       locking.LEVEL_INSTANCE: [],
15539       locking.LEVEL_NODEGROUP: [],
15540       locking.LEVEL_NODE: [],
15541       }
15542
15543   def DeclareLocks(self, level):
15544     if level == locking.LEVEL_INSTANCE:
15545       assert not self.needed_locks[locking.LEVEL_INSTANCE]
15546
15547       # Lock instances optimistically, needs verification once node and group
15548       # locks have been acquired
15549       self.needed_locks[locking.LEVEL_INSTANCE] = \
15550         self.cfg.GetNodeGroupInstances(self.group_uuid)
15551
15552     elif level == locking.LEVEL_NODEGROUP:
15553       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15554
15555       if self.req_target_uuids:
15556         lock_groups = set([self.group_uuid] + self.req_target_uuids)
15557
15558         # Lock all groups used by instances optimistically; this requires going
15559         # via the node before it's locked, requiring verification later on
15560         lock_groups.update(group_uuid
15561                            for instance_name in
15562                              self.owned_locks(locking.LEVEL_INSTANCE)
15563                            for group_uuid in
15564                              self.cfg.GetInstanceNodeGroups(instance_name))
15565       else:
15566         # No target groups, need to lock all of them
15567         lock_groups = locking.ALL_SET
15568
15569       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15570
15571     elif level == locking.LEVEL_NODE:
15572       # This will only lock the nodes in the group to be evacuated which
15573       # contain actual instances
15574       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15575       self._LockInstancesNodes()
15576
15577       # Lock all nodes in group to be evacuated and target groups
15578       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15579       assert self.group_uuid in owned_groups
15580       member_nodes = [node_name
15581                       for group in owned_groups
15582                       for node_name in self.cfg.GetNodeGroup(group).members]
15583       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15584
15585   def CheckPrereq(self):
15586     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15587     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15588     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15589
15590     assert owned_groups.issuperset(self.req_target_uuids)
15591     assert self.group_uuid in owned_groups
15592
15593     # Check if locked instances are still correct
15594     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15595
15596     # Get instance information
15597     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15598
15599     # Check if node groups for locked instances are still correct
15600     _CheckInstancesNodeGroups(self.cfg, self.instances,
15601                               owned_groups, owned_nodes, self.group_uuid)
15602
15603     if self.req_target_uuids:
15604       # User requested specific target groups
15605       self.target_uuids = self.req_target_uuids
15606     else:
15607       # All groups except the one to be evacuated are potential targets
15608       self.target_uuids = [group_uuid for group_uuid in owned_groups
15609                            if group_uuid != self.group_uuid]
15610
15611       if not self.target_uuids:
15612         raise errors.OpPrereqError("There are no possible target groups",
15613                                    errors.ECODE_INVAL)
15614
15615   def BuildHooksEnv(self):
15616     """Build hooks env.
15617
15618     """
15619     return {
15620       "GROUP_NAME": self.op.group_name,
15621       "TARGET_GROUPS": " ".join(self.target_uuids),
15622       }
15623
15624   def BuildHooksNodes(self):
15625     """Build hooks nodes.
15626
15627     """
15628     mn = self.cfg.GetMasterNode()
15629
15630     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15631
15632     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15633
15634     return (run_nodes, run_nodes)
15635
15636   def Exec(self, feedback_fn):
15637     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15638
15639     assert self.group_uuid not in self.target_uuids
15640
15641     req = iallocator.IAReqGroupChange(instances=instances,
15642                                       target_groups=self.target_uuids)
15643     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15644
15645     ial.Run(self.op.iallocator)
15646
15647     if not ial.success:
15648       raise errors.OpPrereqError("Can't compute group evacuation using"
15649                                  " iallocator '%s': %s" %
15650                                  (self.op.iallocator, ial.info),
15651                                  errors.ECODE_NORES)
15652
15653     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15654
15655     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15656                  len(jobs), self.op.group_name)
15657
15658     return ResultWithJobs(jobs)
15659
15660
15661 class TagsLU(NoHooksLU): # pylint: disable=W0223
15662   """Generic tags LU.
15663
15664   This is an abstract class which is the parent of all the other tags LUs.
15665
15666   """
15667   def ExpandNames(self):
15668     self.group_uuid = None
15669     self.needed_locks = {}
15670
15671     if self.op.kind == constants.TAG_NODE:
15672       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15673       lock_level = locking.LEVEL_NODE
15674       lock_name = self.op.name
15675     elif self.op.kind == constants.TAG_INSTANCE:
15676       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15677       lock_level = locking.LEVEL_INSTANCE
15678       lock_name = self.op.name
15679     elif self.op.kind == constants.TAG_NODEGROUP:
15680       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15681       lock_level = locking.LEVEL_NODEGROUP
15682       lock_name = self.group_uuid
15683     elif self.op.kind == constants.TAG_NETWORK:
15684       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15685       lock_level = locking.LEVEL_NETWORK
15686       lock_name = self.network_uuid
15687     else:
15688       lock_level = None
15689       lock_name = None
15690
15691     if lock_level and getattr(self.op, "use_locking", True):
15692       self.needed_locks[lock_level] = lock_name
15693
15694     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15695     # not possible to acquire the BGL based on opcode parameters)
15696
15697   def CheckPrereq(self):
15698     """Check prerequisites.
15699
15700     """
15701     if self.op.kind == constants.TAG_CLUSTER:
15702       self.target = self.cfg.GetClusterInfo()
15703     elif self.op.kind == constants.TAG_NODE:
15704       self.target = self.cfg.GetNodeInfo(self.op.name)
15705     elif self.op.kind == constants.TAG_INSTANCE:
15706       self.target = self.cfg.GetInstanceInfo(self.op.name)
15707     elif self.op.kind == constants.TAG_NODEGROUP:
15708       self.target = self.cfg.GetNodeGroup(self.group_uuid)
15709     elif self.op.kind == constants.TAG_NETWORK:
15710       self.target = self.cfg.GetNetwork(self.network_uuid)
15711     else:
15712       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15713                                  str(self.op.kind), errors.ECODE_INVAL)
15714
15715
15716 class LUTagsGet(TagsLU):
15717   """Returns the tags of a given object.
15718
15719   """
15720   REQ_BGL = False
15721
15722   def ExpandNames(self):
15723     TagsLU.ExpandNames(self)
15724
15725     # Share locks as this is only a read operation
15726     self.share_locks = _ShareAll()
15727
15728   def Exec(self, feedback_fn):
15729     """Returns the tag list.
15730
15731     """
15732     return list(self.target.GetTags())
15733
15734
15735 class LUTagsSearch(NoHooksLU):
15736   """Searches the tags for a given pattern.
15737
15738   """
15739   REQ_BGL = False
15740
15741   def ExpandNames(self):
15742     self.needed_locks = {}
15743
15744   def CheckPrereq(self):
15745     """Check prerequisites.
15746
15747     This checks the pattern passed for validity by compiling it.
15748
15749     """
15750     try:
15751       self.re = re.compile(self.op.pattern)
15752     except re.error, err:
15753       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15754                                  (self.op.pattern, err), errors.ECODE_INVAL)
15755
15756   def Exec(self, feedback_fn):
15757     """Returns the tag list.
15758
15759     """
15760     cfg = self.cfg
15761     tgts = [("/cluster", cfg.GetClusterInfo())]
15762     ilist = cfg.GetAllInstancesInfo().values()
15763     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15764     nlist = cfg.GetAllNodesInfo().values()
15765     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15766     tgts.extend(("/nodegroup/%s" % n.name, n)
15767                 for n in cfg.GetAllNodeGroupsInfo().values())
15768     results = []
15769     for path, target in tgts:
15770       for tag in target.GetTags():
15771         if self.re.search(tag):
15772           results.append((path, tag))
15773     return results
15774
15775
15776 class LUTagsSet(TagsLU):
15777   """Sets a tag on a given object.
15778
15779   """
15780   REQ_BGL = False
15781
15782   def CheckPrereq(self):
15783     """Check prerequisites.
15784
15785     This checks the type and length of the tag name and value.
15786
15787     """
15788     TagsLU.CheckPrereq(self)
15789     for tag in self.op.tags:
15790       objects.TaggableObject.ValidateTag(tag)
15791
15792   def Exec(self, feedback_fn):
15793     """Sets the tag.
15794
15795     """
15796     try:
15797       for tag in self.op.tags:
15798         self.target.AddTag(tag)
15799     except errors.TagError, err:
15800       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15801     self.cfg.Update(self.target, feedback_fn)
15802
15803
15804 class LUTagsDel(TagsLU):
15805   """Delete a list of tags from a given object.
15806
15807   """
15808   REQ_BGL = False
15809
15810   def CheckPrereq(self):
15811     """Check prerequisites.
15812
15813     This checks that we have the given tag.
15814
15815     """
15816     TagsLU.CheckPrereq(self)
15817     for tag in self.op.tags:
15818       objects.TaggableObject.ValidateTag(tag)
15819     del_tags = frozenset(self.op.tags)
15820     cur_tags = self.target.GetTags()
15821
15822     diff_tags = del_tags - cur_tags
15823     if diff_tags:
15824       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15825       raise errors.OpPrereqError("Tag(s) %s not found" %
15826                                  (utils.CommaJoin(diff_names), ),
15827                                  errors.ECODE_NOENT)
15828
15829   def Exec(self, feedback_fn):
15830     """Remove the tag from the object.
15831
15832     """
15833     for tag in self.op.tags:
15834       self.target.RemoveTag(tag)
15835     self.cfg.Update(self.target, feedback_fn)
15836
15837
15838 class LUTestDelay(NoHooksLU):
15839   """Sleep for a specified amount of time.
15840
15841   This LU sleeps on the master and/or nodes for a specified amount of
15842   time.
15843
15844   """
15845   REQ_BGL = False
15846
15847   def ExpandNames(self):
15848     """Expand names and set required locks.
15849
15850     This expands the node list, if any.
15851
15852     """
15853     self.needed_locks = {}
15854     if self.op.on_nodes:
15855       # _GetWantedNodes can be used here, but is not always appropriate to use
15856       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15857       # more information.
15858       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15859       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15860
15861   def _TestDelay(self):
15862     """Do the actual sleep.
15863
15864     """
15865     if self.op.on_master:
15866       if not utils.TestDelay(self.op.duration):
15867         raise errors.OpExecError("Error during master delay test")
15868     if self.op.on_nodes:
15869       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15870       for node, node_result in result.items():
15871         node_result.Raise("Failure during rpc call to node %s" % node)
15872
15873   def Exec(self, feedback_fn):
15874     """Execute the test delay opcode, with the wanted repetitions.
15875
15876     """
15877     if self.op.repeat == 0:
15878       self._TestDelay()
15879     else:
15880       top_value = self.op.repeat - 1
15881       for i in range(self.op.repeat):
15882         self.LogInfo("Test delay iteration %d/%d", i, top_value)
15883         self._TestDelay()
15884
15885
15886 class LURestrictedCommand(NoHooksLU):
15887   """Logical unit for executing restricted commands.
15888
15889   """
15890   REQ_BGL = False
15891
15892   def ExpandNames(self):
15893     if self.op.nodes:
15894       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15895
15896     self.needed_locks = {
15897       locking.LEVEL_NODE: self.op.nodes,
15898       }
15899     self.share_locks = {
15900       locking.LEVEL_NODE: not self.op.use_locking,
15901       }
15902
15903   def CheckPrereq(self):
15904     """Check prerequisites.
15905
15906     """
15907
15908   def Exec(self, feedback_fn):
15909     """Execute restricted command and return output.
15910
15911     """
15912     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15913
15914     # Check if correct locks are held
15915     assert set(self.op.nodes).issubset(owned_nodes)
15916
15917     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15918
15919     result = []
15920
15921     for node_name in self.op.nodes:
15922       nres = rpcres[node_name]
15923       if nres.fail_msg:
15924         msg = ("Command '%s' on node '%s' failed: %s" %
15925                (self.op.command, node_name, nres.fail_msg))
15926         result.append((False, msg))
15927       else:
15928         result.append((True, nres.payload))
15929
15930     return result
15931
15932
15933 class LUTestJqueue(NoHooksLU):
15934   """Utility LU to test some aspects of the job queue.
15935
15936   """
15937   REQ_BGL = False
15938
15939   # Must be lower than default timeout for WaitForJobChange to see whether it
15940   # notices changed jobs
15941   _CLIENT_CONNECT_TIMEOUT = 20.0
15942   _CLIENT_CONFIRM_TIMEOUT = 60.0
15943
15944   @classmethod
15945   def _NotifyUsingSocket(cls, cb, errcls):
15946     """Opens a Unix socket and waits for another program to connect.
15947
15948     @type cb: callable
15949     @param cb: Callback to send socket name to client
15950     @type errcls: class
15951     @param errcls: Exception class to use for errors
15952
15953     """
15954     # Using a temporary directory as there's no easy way to create temporary
15955     # sockets without writing a custom loop around tempfile.mktemp and
15956     # socket.bind
15957     tmpdir = tempfile.mkdtemp()
15958     try:
15959       tmpsock = utils.PathJoin(tmpdir, "sock")
15960
15961       logging.debug("Creating temporary socket at %s", tmpsock)
15962       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15963       try:
15964         sock.bind(tmpsock)
15965         sock.listen(1)
15966
15967         # Send details to client
15968         cb(tmpsock)
15969
15970         # Wait for client to connect before continuing
15971         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15972         try:
15973           (conn, _) = sock.accept()
15974         except socket.error, err:
15975           raise errcls("Client didn't connect in time (%s)" % err)
15976       finally:
15977         sock.close()
15978     finally:
15979       # Remove as soon as client is connected
15980       shutil.rmtree(tmpdir)
15981
15982     # Wait for client to close
15983     try:
15984       try:
15985         # pylint: disable=E1101
15986         # Instance of '_socketobject' has no ... member
15987         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15988         conn.recv(1)
15989       except socket.error, err:
15990         raise errcls("Client failed to confirm notification (%s)" % err)
15991     finally:
15992       conn.close()
15993
15994   def _SendNotification(self, test, arg, sockname):
15995     """Sends a notification to the client.
15996
15997     @type test: string
15998     @param test: Test name
15999     @param arg: Test argument (depends on test)
16000     @type sockname: string
16001     @param sockname: Socket path
16002
16003     """
16004     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16005
16006   def _Notify(self, prereq, test, arg):
16007     """Notifies the client of a test.
16008
16009     @type prereq: bool
16010     @param prereq: Whether this is a prereq-phase test
16011     @type test: string
16012     @param test: Test name
16013     @param arg: Test argument (depends on test)
16014
16015     """
16016     if prereq:
16017       errcls = errors.OpPrereqError
16018     else:
16019       errcls = errors.OpExecError
16020
16021     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16022                                                   test, arg),
16023                                    errcls)
16024
16025   def CheckArguments(self):
16026     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16027     self.expandnames_calls = 0
16028
16029   def ExpandNames(self):
16030     checkargs_calls = getattr(self, "checkargs_calls", 0)
16031     if checkargs_calls < 1:
16032       raise errors.ProgrammerError("CheckArguments was not called")
16033
16034     self.expandnames_calls += 1
16035
16036     if self.op.notify_waitlock:
16037       self._Notify(True, constants.JQT_EXPANDNAMES, None)
16038
16039     self.LogInfo("Expanding names")
16040
16041     # Get lock on master node (just to get a lock, not for a particular reason)
16042     self.needed_locks = {
16043       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16044       }
16045
16046   def Exec(self, feedback_fn):
16047     if self.expandnames_calls < 1:
16048       raise errors.ProgrammerError("ExpandNames was not called")
16049
16050     if self.op.notify_exec:
16051       self._Notify(False, constants.JQT_EXEC, None)
16052
16053     self.LogInfo("Executing")
16054
16055     if self.op.log_messages:
16056       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16057       for idx, msg in enumerate(self.op.log_messages):
16058         self.LogInfo("Sending log message %s", idx + 1)
16059         feedback_fn(constants.JQT_MSGPREFIX + msg)
16060         # Report how many test messages have been sent
16061         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16062
16063     if self.op.fail:
16064       raise errors.OpExecError("Opcode failure was requested")
16065
16066     return True
16067
16068
16069 class LUTestAllocator(NoHooksLU):
16070   """Run allocator tests.
16071
16072   This LU runs the allocator tests
16073
16074   """
16075   def CheckPrereq(self):
16076     """Check prerequisites.
16077
16078     This checks the opcode parameters depending on the director and mode test.
16079
16080     """
16081     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16082                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
16083       for attr in ["memory", "disks", "disk_template",
16084                    "os", "tags", "nics", "vcpus"]:
16085         if not hasattr(self.op, attr):
16086           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16087                                      attr, errors.ECODE_INVAL)
16088       iname = self.cfg.ExpandInstanceName(self.op.name)
16089       if iname is not None:
16090         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16091                                    iname, errors.ECODE_EXISTS)
16092       if not isinstance(self.op.nics, list):
16093         raise errors.OpPrereqError("Invalid parameter 'nics'",
16094                                    errors.ECODE_INVAL)
16095       if not isinstance(self.op.disks, list):
16096         raise errors.OpPrereqError("Invalid parameter 'disks'",
16097                                    errors.ECODE_INVAL)
16098       for row in self.op.disks:
16099         if (not isinstance(row, dict) or
16100             constants.IDISK_SIZE not in row or
16101             not isinstance(row[constants.IDISK_SIZE], int) or
16102             constants.IDISK_MODE not in row or
16103             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16104           raise errors.OpPrereqError("Invalid contents of the 'disks'"
16105                                      " parameter", errors.ECODE_INVAL)
16106       if self.op.hypervisor is None:
16107         self.op.hypervisor = self.cfg.GetHypervisorType()
16108     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16109       fname = _ExpandInstanceName(self.cfg, self.op.name)
16110       self.op.name = fname
16111       self.relocate_from = \
16112           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16113     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16114                           constants.IALLOCATOR_MODE_NODE_EVAC):
16115       if not self.op.instances:
16116         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16117       self.op.instances = _GetWantedInstances(self, self.op.instances)
16118     else:
16119       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16120                                  self.op.mode, errors.ECODE_INVAL)
16121
16122     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16123       if self.op.iallocator is None:
16124         raise errors.OpPrereqError("Missing allocator name",
16125                                    errors.ECODE_INVAL)
16126     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16127       raise errors.OpPrereqError("Wrong allocator test '%s'" %
16128                                  self.op.direction, errors.ECODE_INVAL)
16129
16130   def Exec(self, feedback_fn):
16131     """Run the allocator test.
16132
16133     """
16134     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16135       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16136                                           memory=self.op.memory,
16137                                           disks=self.op.disks,
16138                                           disk_template=self.op.disk_template,
16139                                           os=self.op.os,
16140                                           tags=self.op.tags,
16141                                           nics=self.op.nics,
16142                                           vcpus=self.op.vcpus,
16143                                           spindle_use=self.op.spindle_use,
16144                                           hypervisor=self.op.hypervisor)
16145     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16146       req = iallocator.IAReqRelocate(name=self.op.name,
16147                                      relocate_from=list(self.relocate_from))
16148     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16149       req = iallocator.IAReqGroupChange(instances=self.op.instances,
16150                                         target_groups=self.op.target_groups)
16151     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16152       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16153                                      evac_mode=self.op.evac_mode)
16154     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16155       disk_template = self.op.disk_template
16156       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16157                                              memory=self.op.memory,
16158                                              disks=self.op.disks,
16159                                              disk_template=disk_template,
16160                                              os=self.op.os,
16161                                              tags=self.op.tags,
16162                                              nics=self.op.nics,
16163                                              vcpus=self.op.vcpus,
16164                                              spindle_use=self.op.spindle_use,
16165                                              hypervisor=self.op.hypervisor)
16166                for idx in range(self.op.count)]
16167       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16168     else:
16169       raise errors.ProgrammerError("Uncatched mode %s in"
16170                                    " LUTestAllocator.Exec", self.op.mode)
16171
16172     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16173     if self.op.direction == constants.IALLOCATOR_DIR_IN:
16174       result = ial.in_text
16175     else:
16176       ial.Run(self.op.iallocator, validate=False)
16177       result = ial.out_text
16178     return result
16179
16180
16181 class LUNetworkAdd(LogicalUnit):
16182   """Logical unit for creating networks.
16183
16184   """
16185   HPATH = "network-add"
16186   HTYPE = constants.HTYPE_NETWORK
16187   REQ_BGL = False
16188
16189   def BuildHooksNodes(self):
16190     """Build hooks nodes.
16191
16192     """
16193     mn = self.cfg.GetMasterNode()
16194     return ([mn], [mn])
16195
16196   def CheckArguments(self):
16197     if self.op.mac_prefix:
16198       self.op.mac_prefix = \
16199         utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16200
16201   def ExpandNames(self):
16202     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16203
16204     if self.op.conflicts_check:
16205       self.share_locks[locking.LEVEL_NODE] = 1
16206       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16207       self.needed_locks = {
16208         locking.LEVEL_NODE: locking.ALL_SET,
16209         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16210         }
16211     else:
16212       self.needed_locks = {}
16213
16214     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16215
16216   def CheckPrereq(self):
16217     if self.op.network is None:
16218       raise errors.OpPrereqError("Network must be given",
16219                                  errors.ECODE_INVAL)
16220
16221     try:
16222       existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16223     except errors.OpPrereqError:
16224       pass
16225     else:
16226       raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16227                                  " network (UUID: %s)" %
16228                                  (self.op.network_name, existing_uuid),
16229                                  errors.ECODE_EXISTS)
16230
16231     # Check tag validity
16232     for tag in self.op.tags:
16233       objects.TaggableObject.ValidateTag(tag)
16234
16235   def BuildHooksEnv(self):
16236     """Build hooks env.
16237
16238     """
16239     args = {
16240       "name": self.op.network_name,
16241       "subnet": self.op.network,
16242       "gateway": self.op.gateway,
16243       "network6": self.op.network6,
16244       "gateway6": self.op.gateway6,
16245       "mac_prefix": self.op.mac_prefix,
16246       "tags": self.op.tags,
16247       }
16248     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16249
16250   def Exec(self, feedback_fn):
16251     """Add the ip pool to the cluster.
16252
16253     """
16254     nobj = objects.Network(name=self.op.network_name,
16255                            network=self.op.network,
16256                            gateway=self.op.gateway,
16257                            network6=self.op.network6,
16258                            gateway6=self.op.gateway6,
16259                            mac_prefix=self.op.mac_prefix,
16260                            uuid=self.network_uuid)
16261     # Initialize the associated address pool
16262     try:
16263       pool = network.AddressPool.InitializeNetwork(nobj)
16264     except errors.AddressPoolError, err:
16265       raise errors.OpExecError("Cannot create IP address pool for network"
16266                                " '%s': %s" % (self.op.network_name, err))
16267
16268     # Check if we need to reserve the nodes and the cluster master IP
16269     # These may not be allocated to any instances in routed mode, as
16270     # they wouldn't function anyway.
16271     if self.op.conflicts_check:
16272       for node in self.cfg.GetAllNodesInfo().values():
16273         for ip in [node.primary_ip, node.secondary_ip]:
16274           try:
16275             if pool.Contains(ip):
16276               pool.Reserve(ip)
16277               self.LogInfo("Reserved IP address of node '%s' (%s)",
16278                            node.name, ip)
16279           except errors.AddressPoolError, err:
16280             self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16281                             ip, node.name, err)
16282
16283       master_ip = self.cfg.GetClusterInfo().master_ip
16284       try:
16285         if pool.Contains(master_ip):
16286           pool.Reserve(master_ip)
16287           self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16288       except errors.AddressPoolError, err:
16289         self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16290                         master_ip, err)
16291
16292     if self.op.add_reserved_ips:
16293       for ip in self.op.add_reserved_ips:
16294         try:
16295           pool.Reserve(ip, external=True)
16296         except errors.AddressPoolError, err:
16297           raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16298                                    (ip, err))
16299
16300     if self.op.tags:
16301       for tag in self.op.tags:
16302         nobj.AddTag(tag)
16303
16304     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16305     del self.remove_locks[locking.LEVEL_NETWORK]
16306
16307
16308 class LUNetworkRemove(LogicalUnit):
16309   HPATH = "network-remove"
16310   HTYPE = constants.HTYPE_NETWORK
16311   REQ_BGL = False
16312
16313   def ExpandNames(self):
16314     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16315
16316     self.share_locks[locking.LEVEL_NODEGROUP] = 1
16317     self.needed_locks = {
16318       locking.LEVEL_NETWORK: [self.network_uuid],
16319       locking.LEVEL_NODEGROUP: locking.ALL_SET,
16320       }
16321
16322   def CheckPrereq(self):
16323     """Check prerequisites.
16324
16325     This checks that the given network name exists as a network, that is
16326     empty (i.e., contains no nodes), and that is not the last group of the
16327     cluster.
16328
16329     """
16330     # Verify that the network is not conncted.
16331     node_groups = [group.name
16332                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16333                    if self.network_uuid in group.networks]
16334
16335     if node_groups:
16336       self.LogWarning("Network '%s' is connected to the following"
16337                       " node groups: %s" %
16338                       (self.op.network_name,
16339                        utils.CommaJoin(utils.NiceSort(node_groups))))
16340       raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16341
16342   def BuildHooksEnv(self):
16343     """Build hooks env.
16344
16345     """
16346     return {
16347       "NETWORK_NAME": self.op.network_name,
16348       }
16349
16350   def BuildHooksNodes(self):
16351     """Build hooks nodes.
16352
16353     """
16354     mn = self.cfg.GetMasterNode()
16355     return ([mn], [mn])
16356
16357   def Exec(self, feedback_fn):
16358     """Remove the network.
16359
16360     """
16361     try:
16362       self.cfg.RemoveNetwork(self.network_uuid)
16363     except errors.ConfigurationError:
16364       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16365                                (self.op.network_name, self.network_uuid))
16366
16367
16368 class LUNetworkSetParams(LogicalUnit):
16369   """Modifies the parameters of a network.
16370
16371   """
16372   HPATH = "network-modify"
16373   HTYPE = constants.HTYPE_NETWORK
16374   REQ_BGL = False
16375
16376   def CheckArguments(self):
16377     if (self.op.gateway and
16378         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16379       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16380                                  " at once", errors.ECODE_INVAL)
16381
16382   def ExpandNames(self):
16383     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16384
16385     self.needed_locks = {
16386       locking.LEVEL_NETWORK: [self.network_uuid],
16387       }
16388
16389   def CheckPrereq(self):
16390     """Check prerequisites.
16391
16392     """
16393     self.network = self.cfg.GetNetwork(self.network_uuid)
16394     self.gateway = self.network.gateway
16395     self.mac_prefix = self.network.mac_prefix
16396     self.network6 = self.network.network6
16397     self.gateway6 = self.network.gateway6
16398     self.tags = self.network.tags
16399
16400     self.pool = network.AddressPool(self.network)
16401
16402     if self.op.gateway:
16403       if self.op.gateway == constants.VALUE_NONE:
16404         self.gateway = None
16405       else:
16406         self.gateway = self.op.gateway
16407         if self.pool.IsReserved(self.gateway):
16408           raise errors.OpPrereqError("Gateway IP address '%s' is already"
16409                                      " reserved" % self.gateway,
16410                                      errors.ECODE_STATE)
16411
16412     if self.op.mac_prefix:
16413       if self.op.mac_prefix == constants.VALUE_NONE:
16414         self.mac_prefix = None
16415       else:
16416         self.mac_prefix = \
16417           utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16418
16419     if self.op.gateway6:
16420       if self.op.gateway6 == constants.VALUE_NONE:
16421         self.gateway6 = None
16422       else:
16423         self.gateway6 = self.op.gateway6
16424
16425     if self.op.network6:
16426       if self.op.network6 == constants.VALUE_NONE:
16427         self.network6 = None
16428       else:
16429         self.network6 = self.op.network6
16430
16431   def BuildHooksEnv(self):
16432     """Build hooks env.
16433
16434     """
16435     args = {
16436       "name": self.op.network_name,
16437       "subnet": self.network.network,
16438       "gateway": self.gateway,
16439       "network6": self.network6,
16440       "gateway6": self.gateway6,
16441       "mac_prefix": self.mac_prefix,
16442       "tags": self.tags,
16443       }
16444     return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16445
16446   def BuildHooksNodes(self):
16447     """Build hooks nodes.
16448
16449     """
16450     mn = self.cfg.GetMasterNode()
16451     return ([mn], [mn])
16452
16453   def Exec(self, feedback_fn):
16454     """Modifies the network.
16455
16456     """
16457     #TODO: reserve/release via temporary reservation manager
16458     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16459     if self.op.gateway:
16460       if self.gateway == self.network.gateway:
16461         self.LogWarning("Gateway is already %s", self.gateway)
16462       else:
16463         if self.gateway:
16464           self.pool.Reserve(self.gateway, external=True)
16465         if self.network.gateway:
16466           self.pool.Release(self.network.gateway, external=True)
16467         self.network.gateway = self.gateway
16468
16469     if self.op.add_reserved_ips:
16470       for ip in self.op.add_reserved_ips:
16471         try:
16472           if self.pool.IsReserved(ip):
16473             self.LogWarning("IP address %s is already reserved", ip)
16474           else:
16475             self.pool.Reserve(ip, external=True)
16476         except errors.AddressPoolError, err:
16477           self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16478
16479     if self.op.remove_reserved_ips:
16480       for ip in self.op.remove_reserved_ips:
16481         if ip == self.network.gateway:
16482           self.LogWarning("Cannot unreserve Gateway's IP")
16483           continue
16484         try:
16485           if not self.pool.IsReserved(ip):
16486             self.LogWarning("IP address %s is already unreserved", ip)
16487           else:
16488             self.pool.Release(ip, external=True)
16489         except errors.AddressPoolError, err:
16490           self.LogWarning("Cannot release IP address %s: %s", ip, err)
16491
16492     if self.op.mac_prefix:
16493       self.network.mac_prefix = self.mac_prefix
16494
16495     if self.op.network6:
16496       self.network.network6 = self.network6
16497
16498     if self.op.gateway6:
16499       self.network.gateway6 = self.gateway6
16500
16501     self.pool.Validate()
16502
16503     self.cfg.Update(self.network, feedback_fn)
16504
16505
16506 class _NetworkQuery(_QueryBase):
16507   FIELDS = query.NETWORK_FIELDS
16508
16509   def ExpandNames(self, lu):
16510     lu.needed_locks = {}
16511     lu.share_locks = _ShareAll()
16512
16513     self.do_locking = self.use_locking
16514
16515     all_networks = lu.cfg.GetAllNetworksInfo()
16516     name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16517
16518     if self.names:
16519       missing = []
16520       self.wanted = []
16521
16522       for name in self.names:
16523         if name in name_to_uuid:
16524           self.wanted.append(name_to_uuid[name])
16525         else:
16526           missing.append(name)
16527
16528       if missing:
16529         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16530                                    errors.ECODE_NOENT)
16531     else:
16532       self.wanted = locking.ALL_SET
16533
16534     if self.do_locking:
16535       lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16536       if query.NETQ_INST in self.requested_data:
16537         lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16538       if query.NETQ_GROUP in self.requested_data:
16539         lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16540
16541   def DeclareLocks(self, lu, level):
16542     pass
16543
16544   def _GetQueryData(self, lu):
16545     """Computes the list of networks and their attributes.
16546
16547     """
16548     all_networks = lu.cfg.GetAllNetworksInfo()
16549
16550     network_uuids = self._GetNames(lu, all_networks.keys(),
16551                                    locking.LEVEL_NETWORK)
16552
16553     do_instances = query.NETQ_INST in self.requested_data
16554     do_groups = query.NETQ_GROUP in self.requested_data
16555
16556     network_to_instances = None
16557     network_to_groups = None
16558
16559     # For NETQ_GROUP, we need to map network->[groups]
16560     if do_groups:
16561       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16562       network_to_groups = dict((uuid, []) for uuid in network_uuids)
16563       for _, group in all_groups.iteritems():
16564         for net_uuid in network_uuids:
16565           netparams = group.networks.get(net_uuid, None)
16566           if netparams:
16567             info = (group.name, netparams[constants.NIC_MODE],
16568                     netparams[constants.NIC_LINK])
16569
16570             network_to_groups[net_uuid].append(info)
16571
16572     if do_instances:
16573       all_instances = lu.cfg.GetAllInstancesInfo()
16574       network_to_instances = dict((uuid, []) for uuid in network_uuids)
16575       for instance in all_instances.values():
16576         for nic in instance.nics:
16577           if nic.network in network_uuids:
16578             network_to_instances[nic.network].append(instance.name)
16579             break
16580
16581     if query.NETQ_STATS in self.requested_data:
16582       stats = \
16583         dict((uuid,
16584               self._GetStats(network.AddressPool(all_networks[uuid])))
16585              for uuid in network_uuids)
16586     else:
16587       stats = None
16588
16589     return query.NetworkQueryData([all_networks[uuid]
16590                                    for uuid in network_uuids],
16591                                    network_to_groups,
16592                                    network_to_instances,
16593                                    stats)
16594
16595   @staticmethod
16596   def _GetStats(pool):
16597     """Returns statistics for a network address pool.
16598
16599     """
16600     return {
16601       "free_count": pool.GetFreeCount(),
16602       "reserved_count": pool.GetReservedCount(),
16603       "map": pool.GetMap(),
16604       "external_reservations":
16605         utils.CommaJoin(pool.GetExternalReservations()),
16606       }
16607
16608
16609 class LUNetworkQuery(NoHooksLU):
16610   """Logical unit for querying networks.
16611
16612   """
16613   REQ_BGL = False
16614
16615   def CheckArguments(self):
16616     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16617                             self.op.output_fields, self.op.use_locking)
16618
16619   def ExpandNames(self):
16620     self.nq.ExpandNames(self)
16621
16622   def Exec(self, feedback_fn):
16623     return self.nq.OldStyleQuery(self)
16624
16625
16626 class LUNetworkConnect(LogicalUnit):
16627   """Connect a network to a nodegroup
16628
16629   """
16630   HPATH = "network-connect"
16631   HTYPE = constants.HTYPE_NETWORK
16632   REQ_BGL = False
16633
16634   def ExpandNames(self):
16635     self.network_name = self.op.network_name
16636     self.group_name = self.op.group_name
16637     self.network_mode = self.op.network_mode
16638     self.network_link = self.op.network_link
16639
16640     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16641     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16642
16643     self.needed_locks = {
16644       locking.LEVEL_INSTANCE: [],
16645       locking.LEVEL_NODEGROUP: [self.group_uuid],
16646       }
16647     self.share_locks[locking.LEVEL_INSTANCE] = 1
16648
16649     if self.op.conflicts_check:
16650       self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16651       self.share_locks[locking.LEVEL_NETWORK] = 1
16652
16653   def DeclareLocks(self, level):
16654     if level == locking.LEVEL_INSTANCE:
16655       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16656
16657       # Lock instances optimistically, needs verification once group lock has
16658       # been acquired
16659       if self.op.conflicts_check:
16660         self.needed_locks[locking.LEVEL_INSTANCE] = \
16661             self.cfg.GetNodeGroupInstances(self.group_uuid)
16662
16663   def BuildHooksEnv(self):
16664     ret = {
16665       "GROUP_NAME": self.group_name,
16666       "GROUP_NETWORK_MODE": self.network_mode,
16667       "GROUP_NETWORK_LINK": self.network_link,
16668       }
16669     return ret
16670
16671   def BuildHooksNodes(self):
16672     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16673     return (nodes, nodes)
16674
16675   def CheckPrereq(self):
16676     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16677
16678     assert self.group_uuid in owned_groups
16679
16680     self.netparams = {
16681       constants.NIC_MODE: self.network_mode,
16682       constants.NIC_LINK: self.network_link,
16683       }
16684     objects.NIC.CheckParameterSyntax(self.netparams)
16685
16686     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16687     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16688     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16689     self.connected = False
16690     if self.network_uuid in self.group.networks:
16691       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16692                       (self.network_name, self.group.name))
16693       self.connected = True
16694       return
16695
16696     if self.op.conflicts_check:
16697       pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16698
16699       _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16700                             "connect to")
16701
16702   def Exec(self, feedback_fn):
16703     if self.connected:
16704       return
16705
16706     self.group.networks[self.network_uuid] = self.netparams
16707     self.cfg.Update(self.group, feedback_fn)
16708
16709
16710 def _NetworkConflictCheck(lu, check_fn, action):
16711   """Checks for network interface conflicts with a network.
16712
16713   @type lu: L{LogicalUnit}
16714   @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16715     returning boolean
16716   @param check_fn: Function checking for conflict
16717   @type action: string
16718   @param action: Part of error message (see code)
16719   @raise errors.OpPrereqError: If conflicting IP addresses are found.
16720
16721   """
16722   # Check if locked instances are still correct
16723   owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16724   _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16725
16726   conflicts = []
16727
16728   for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16729     instconflicts = [(idx, nic.ip)
16730                      for (idx, nic) in enumerate(instance.nics)
16731                      if check_fn(nic)]
16732
16733     if instconflicts:
16734       conflicts.append((instance.name, instconflicts))
16735
16736   if conflicts:
16737     lu.LogWarning("IP addresses from network '%s', which is about to %s"
16738                   " node group '%s', are in use: %s" %
16739                   (lu.network_name, action, lu.group.name,
16740                    utils.CommaJoin(("%s: %s" %
16741                                     (name, _FmtNetworkConflict(details)))
16742                                    for (name, details) in conflicts)))
16743
16744     raise errors.OpPrereqError("Conflicting IP addresses found; "
16745                                " remove/modify the corresponding network"
16746                                " interfaces", errors.ECODE_STATE)
16747
16748
16749 def _FmtNetworkConflict(details):
16750   """Utility for L{_NetworkConflictCheck}.
16751
16752   """
16753   return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16754                          for (idx, ipaddr) in details)
16755
16756
16757 class LUNetworkDisconnect(LogicalUnit):
16758   """Disconnect a network to a nodegroup
16759
16760   """
16761   HPATH = "network-disconnect"
16762   HTYPE = constants.HTYPE_NETWORK
16763   REQ_BGL = False
16764
16765   def ExpandNames(self):
16766     self.network_name = self.op.network_name
16767     self.group_name = self.op.group_name
16768
16769     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16770     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16771
16772     self.needed_locks = {
16773       locking.LEVEL_INSTANCE: [],
16774       locking.LEVEL_NODEGROUP: [self.group_uuid],
16775       }
16776     self.share_locks[locking.LEVEL_INSTANCE] = 1
16777
16778   def DeclareLocks(self, level):
16779     if level == locking.LEVEL_INSTANCE:
16780       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16781
16782       # Lock instances optimistically, needs verification once group lock has
16783       # been acquired
16784       self.needed_locks[locking.LEVEL_INSTANCE] = \
16785         self.cfg.GetNodeGroupInstances(self.group_uuid)
16786
16787   def BuildHooksEnv(self):
16788     ret = {
16789       "GROUP_NAME": self.group_name,
16790       }
16791     return ret
16792
16793   def BuildHooksNodes(self):
16794     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16795     return (nodes, nodes)
16796
16797   def CheckPrereq(self):
16798     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16799
16800     assert self.group_uuid in owned_groups
16801
16802     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16803     self.connected = True
16804     if self.network_uuid not in self.group.networks:
16805       self.LogWarning("Network '%s' is not mapped to group '%s'",
16806                       self.network_name, self.group.name)
16807       self.connected = False
16808       return
16809
16810     _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16811                           "disconnect from")
16812
16813   def Exec(self, feedback_fn):
16814     if not self.connected:
16815       return
16816
16817     del self.group.networks[self.network_uuid]
16818     self.cfg.Update(self.group, feedback_fn)
16819
16820
16821 #: Query type implementations
16822 _QUERY_IMPL = {
16823   constants.QR_CLUSTER: _ClusterQuery,
16824   constants.QR_INSTANCE: _InstanceQuery,
16825   constants.QR_NODE: _NodeQuery,
16826   constants.QR_GROUP: _GroupQuery,
16827   constants.QR_NETWORK: _NetworkQuery,
16828   constants.QR_OS: _OsQuery,
16829   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16830   constants.QR_EXPORT: _ExportQuery,
16831   }
16832
16833 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16834
16835
16836 def _GetQueryImplementation(name):
16837   """Returns the implemtnation for a query type.
16838
16839   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16840
16841   """
16842   try:
16843     return _QUERY_IMPL[name]
16844   except KeyError:
16845     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16846                                errors.ECODE_INVAL)
16847
16848
16849 def _CheckForConflictingIp(lu, ip, node):
16850   """In case of conflicting IP address raise error.
16851
16852   @type ip: string
16853   @param ip: IP address
16854   @type node: string
16855   @param node: node name
16856
16857   """
16858   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16859   if conf_net is not None:
16860     raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16861                                 (ip, conf_net)),
16862                                errors.ECODE_STATE)
16863
16864   return (None, None)